diff --git a/apps/cli/src/commands/eval/artifact-writer.ts b/apps/cli/src/commands/eval/artifact-writer.ts
index 859f1ff4b..7798fc554 100644
--- a/apps/cli/src/commands/eval/artifact-writer.ts
+++ b/apps/cli/src/commands/eval/artifact-writer.ts
@@ -94,6 +94,7 @@ export function buildIndexArtifactEntry(
     outputPath?: string;
     answerPath?: string;
     transcriptPath?: string;
+    rawProviderLogPath?: string;
     inputPath?: string;
     responsePath?: string;
     taskBundle?: MaterializedTaskBundlePaths;
diff --git a/apps/cli/src/commands/eval/run-eval.ts b/apps/cli/src/commands/eval/run-eval.ts
index 43637c598..fdf4b1869 100644
--- a/apps/cli/src/commands/eval/run-eval.ts
+++ b/apps/cli/src/commands/eval/run-eval.ts
@@ -361,6 +361,16 @@ export function trimOutputMessages(
   return output;
 }
 
+export function prepareResultForJsonl(
+  result: EvaluationResult,
+  options: { readonly outputMessages: number | 'all' },
+): EvaluationResult {
+  return {
+    ...result,
+    output: trimOutputMessages(result.output, options.outputMessages),
+  };
+}
+
 function normalizeOptions(
   rawOptions: Record<string, unknown>,
   config?: Awaited<ReturnType<typeof loadTsConfig>>,
@@ -1043,11 +1053,7 @@ async function runSingleEvalFile(params: {
       // Each message is trimmed to { role, content } only (no toolCalls, startTime, etc.).
       // Full output with tool calls goes to OTel.
       const resultWithMetadata = withSourceMetadata(result, testFilePath, options);
-      const trimmedOutput = trimOutputMessages(resultWithMetadata.output, options.outputMessages);
-      const trimmedResult: EvaluationResult = {
-        ...resultWithMetadata,
-        output: trimmedOutput,
-      };
+      const trimmedResult = prepareResultForJsonl(resultWithMetadata, options);
       await outputWriter.append(trimmedResult);
 
       // Export to OTel if exporter is configured (skip batch export when streaming is active)
diff --git a/apps/cli/src/commands/inspect/utils.ts b/apps/cli/src/commands/inspect/utils.ts
index 6ea549678..574e9de81 100644
--- a/apps/cli/src/commands/inspect/utils.ts
+++ b/apps/cli/src/commands/inspect/utils.ts
@@ -8,7 +8,7 @@ import {
   resolveExistingRunPrimaryPath,
   resolveWorkspaceOrFilePath,
 } from '../eval/result-layout.js';
-import { loadManifestResults } from '../results/manifest.js';
+import { loadLightweightResults, loadManifestResults } from '../results/manifest.js';
 import { ResultRowSchemaError, normalizeResultRow } from '../results/result-row-schema.js';
 
 // ANSI color codes (no dependency needed)
@@ -636,7 +636,7 @@ export function listResultFilesFromRunsDir(runsDir: string, limit?: number): Res
   for (const { filePath, displayName, runId } of limited) {
     try {
       const fileStat = statSync(filePath);
-      const results = loadResultFile(filePath);
+      const results = loadLightweightResults(filePath);
 
       const testCount = results.length;
       const passCount = results.filter((r) => r.score >= DEFAULT_THRESHOLD).length;
diff --git a/apps/cli/src/commands/results/combine-run.ts b/apps/cli/src/commands/results/combine-run.ts
index 2e97ff711..e77af2e5d 100644
--- a/apps/cli/src/commands/results/combine-run.ts
+++ b/apps/cli/src/commands/results/combine-run.ts
@@ -20,7 +20,12 @@ import {
 } from 'node:fs';
 import path from 'node:path';
 
-import type { EvaluationResult } from '@agentv/core';
+import type {
+  EvaluationResult,
+  ResultArtifactPointerWire,
+  ResultArtifactPointersWire,
+  TranscriptArtifactPointerWire,
+} from '@agentv/core';
 
 import {
   type BenchmarkArtifact,
@@ -305,11 +310,14 @@ function toRunId(cwd: string, runDir: string): string {
 }
 
 const MANIFEST_PATH_FIELDS = [
+  'artifact_dir',
   'grading_path',
   'timing_path',
   'input_path',
   'output_path',
   'response_path',
+  'transcript_path',
+  'raw_provider_log_path',
   'task_dir',
   'eval_path',
   'targets_path',
@@ -317,6 +325,15 @@ const MANIFEST_PATH_FIELDS = [
   'graders_path',
 ] as const;
 
+const POINTER_FAMILIES = {
+  trace: 'traces',
+  transcript: 'transcripts',
+} as const;
+
+function isSafeRelativeArtifactPath(relativePath: string): boolean {
+  return !path.isAbsolute(relativePath) && !relativePath.split(/[\\/]+/).includes('..');
+}
+
 function copyReferencedArtifact(
   sourceBaseDir: string,
   outputDir: string,
@@ -324,7 +341,7 @@ function copyReferencedArtifact(
   relativePath: string | undefined,
 ): string | undefined {
   if (!relativePath) return undefined;
-  if (path.isAbsolute(relativePath) || relativePath.split(/[\\/]+/).includes('..')) {
+  if (!isSafeRelativeArtifactPath(relativePath)) {
     throw new Error(`Unsafe artifact path in source manifest: ${relativePath}`);
   }
   const sourcePath = path.join(sourceBaseDir, relativePath);
@@ -343,6 +360,71 @@ function copyReferencedArtifact(
   return rewritten;
 }
 
+function rewriteArtifactPointer(
+  pointerName: keyof typeof POINTER_FAMILIES,
+  pointer: ResultArtifactPointerWire | undefined,
+  sourceBaseDir: string,
+  outputDir: string,
+  sourceIndex: number,
+): ResultArtifactPointerWire | undefined {
+  if (!pointer) {
+    return undefined;
+  }
+
+  if (!isSafeRelativeArtifactPath(pointer.path)) {
+    throw new Error(`Unsafe artifact path in source manifest: ${pointer.path}`);
+  }
+  const sourcePath = path.join(sourceBaseDir, pointer.path);
+  if (!existsSync(sourcePath)) {
+    return { ...pointer };
+  }
+
+  const rewrittenPath = copyReferencedArtifact(sourceBaseDir, outputDir, sourceIndex, pointer.path);
+  if (!rewrittenPath) {
+    return { ...pointer };
+  }
+
+  const family = pointer.family ?? POINTER_FAMILIES[pointerName];
+  return {
+    ...pointer,
+    path: rewrittenPath,
+    key: path.posix.join(family, rewrittenPath),
+  };
+}
+
+function rewriteTranscriptArtifactPointer(
+  pointer: TranscriptArtifactPointerWire | undefined,
+  sourceBaseDir: string,
+  outputDir: string,
+  sourceIndex: number,
+): TranscriptArtifactPointerWire | undefined {
+  return rewriteArtifactPointer('transcript', pointer, sourceBaseDir, outputDir, sourceIndex) as
+    | TranscriptArtifactPointerWire
+    | undefined;
+}
+
+function rewriteArtifactPointers(
+  pointers: ResultArtifactPointersWire | undefined,
+  sourceBaseDir: string,
+  outputDir: string,
+  sourceIndex: number,
+): ResultArtifactPointersWire | undefined {
+  if (!pointers) {
+    return undefined;
+  }
+
+  return {
+    ...pointers,
+    trace: rewriteArtifactPointer('trace', pointers.trace, sourceBaseDir, outputDir, sourceIndex),
+    transcript: rewriteTranscriptArtifactPointer(
+      pointers.transcript,
+      sourceBaseDir,
+      outputDir,
+      sourceIndex,
+    ),
+  };
+}
+
 function rewriteAndCopyRecord(row: SelectedRow, outputDir: string): ResultManifestRecord {
   const sourceBaseDir = path.dirname(row.source.manifestPath);
   const rewritten: Record<string, unknown> = { ...row.record };
@@ -354,6 +436,20 @@ function rewriteAndCopyRecord(row: SelectedRow, outputDir: string): ResultManife
       row.record[field],
     );
   }
+  const artifactPointers = rewriteArtifactPointers(
+    row.record.artifact_pointers,
+    sourceBaseDir,
+    outputDir,
+    row.source.index,
+  );
+  rewritten.artifact_pointers = artifactPointers;
+  if (
+    row.record.transcript_path &&
+    rewritten.transcript_path === row.record.transcript_path &&
+    artifactPointers?.transcript?.path
+  ) {
+    rewritten.transcript_path = artifactPointers.transcript.path;
+  }
   return rewritten as unknown as ResultManifestRecord;
 }
 
diff --git a/apps/cli/src/commands/results/export.ts b/apps/cli/src/commands/results/export.ts
index 76139169e..69e3f22d7 100644
--- a/apps/cli/src/commands/results/export.ts
+++ b/apps/cli/src/commands/results/export.ts
@@ -21,14 +21,22 @@
  *   - To add new per-test workspace files, add them under each test directory.
  */
 
+import { readFileSync } from 'node:fs';
 import path from 'node:path';
 
-import { command, oneOf, option, optional, positional, string } from 'cmd-ts';
+import { command, flag, oneOf, option, optional, positional, string } from 'cmd-ts';
 
-import type { EvaluationResult, ExportDuplicatePolicy } from '@agentv/core';
+import type { EvaluationResult, ExportDuplicatePolicy, IndexArtifactEntry } from '@agentv/core';
 
 import { parseJsonlResults, writeArtifactsFromResults } from '../eval/artifact-writer.js';
 import { RESULT_INDEX_FILENAME } from '../eval/result-layout.js';
+import { loadManifestResults } from './manifest.js';
+import {
+  type ProjectionBundle,
+  buildProjectionBundle,
+  serializeProjectionBundle,
+  writeProjectionBundle,
+} from './projection-bundle.js';
 import { loadResults as loadSharedResults, resolveSourceFile } from './shared.js';
 
 // ── Export logic ─────────────────────────────────────────────────────────
@@ -92,6 +100,36 @@ export async function loadExportSource(
   return { sourceFile, results };
 }
 
+function readIndexArtifactEntries(indexPath: string): IndexArtifactEntry[] {
+  return readFileSync(indexPath, 'utf8')
+    .split(/\r?\n/)
+    .map((line) => line.trim())
+    .filter(Boolean)
+    .map((line) => JSON.parse(line) as IndexArtifactEntry);
+}
+
+export function buildProjectionBundleFromExportedIndex(options: {
+  readonly sourceFile: string;
+  readonly outputDir: string;
+  readonly cwd?: string;
+  readonly includeRawContent?: boolean;
+  readonly duplicatePolicy?: ExportDuplicatePolicy;
+}): ProjectionBundle {
+  const indexPath = path.join(options.outputDir, RESULT_INDEX_FILENAME);
+  const indexRecords = readIndexArtifactEntries(indexPath);
+  const emittedResults = loadManifestResults(indexPath);
+
+  return buildProjectionBundle(emittedResults, {
+    sourceFile: options.sourceFile,
+    runId: deriveExportRunId(options.sourceFile),
+    cwd: options.cwd,
+    duplicatePolicy: options.duplicatePolicy,
+    includeRawContent: options.includeRawContent,
+    artifactRefStatus: 'emitted',
+    indexRecords,
+  });
+}
+
 // ── CLI command ──────────────────────────────────────────────────────────
 
 export const resultsExportCommand = command({
@@ -122,10 +160,34 @@ export const resultsExportCommand = command({
       description:
         'How to handle duplicate projection identities in the output: update (default), skip, or error',
     }),
+    projectionBundle: flag({
+      long: 'projection-bundle',
+      description: 'Write a vendor-neutral projection_bundle.json alongside exported artifacts',
+    }),
+    dryRun: flag({
+      long: 'dry-run',
+      description: 'Print deterministic projection bundle JSON without writing export artifacts',
+    }),
+    includeRawContent: flag({
+      long: 'include-raw-content',
+      description:
+        'Include raw prompt, output, and tool payload content in the projection bundle (off by default)',
+    }),
   },
-  handler: async ({ source, out, dir, duplicatePolicy }) => {
+  handler: async ({
+    source,
+    out,
+    dir,
+    duplicatePolicy,
+    projectionBundle,
+    dryRun,
+    includeRawContent,
+  }) => {
     const cwd = dir ?? process.cwd();
     const policy = (duplicatePolicy ?? 'update') as ExportDuplicatePolicy;
+    const shouldWriteProjectionBundle = projectionBundle;
+    const shouldDryRun = dryRun;
+    const shouldIncludeRawContent = includeRawContent;
 
     try {
       const { sourceFile, results } = await loadExportSource(source, cwd);
@@ -136,14 +198,44 @@ export const resultsExportCommand = command({
           : path.resolve(cwd, out)
         : deriveOutputDir(cwd, sourceFile);
 
+      const buildBundle = () =>
+        buildProjectionBundle(results, {
+          sourceFile,
+          runId: deriveExportRunId(sourceFile),
+          cwd,
+          duplicatePolicy: policy,
+          includeRawContent: shouldIncludeRawContent,
+        });
+
+      if (shouldDryRun) {
+        process.stdout.write(serializeProjectionBundle(buildBundle()));
+        return;
+      }
+
       await writeArtifactsFromResults(results, outputDir, {
         evalFile: sourceFile,
         runId: deriveExportRunId(sourceFile),
         duplicatePolicy: policy,
       });
 
+      const bundlePath = shouldWriteProjectionBundle
+        ? await writeProjectionBundle(
+            buildProjectionBundleFromExportedIndex({
+              sourceFile,
+              outputDir,
+              cwd,
+              duplicatePolicy: policy,
+              includeRawContent: shouldIncludeRawContent,
+            }),
+            outputDir,
+          )
+        : undefined;
+
       // Report exported test IDs
       console.log(`Exported ${results.length} test(s) to ${outputDir}`);
+      if (bundlePath) {
+        console.log(`Projection bundle written to ${bundlePath}`);
+      }
       for (const result of results) {
         console.log(`  ${result.testId ?? 'unknown'}`);
       }
diff --git a/apps/cli/src/commands/results/manifest.ts b/apps/cli/src/commands/results/manifest.ts
index 65044552e..253e49127 100644
--- a/apps/cli/src/commands/results/manifest.ts
+++ b/apps/cli/src/commands/results/manifest.ts
@@ -3,6 +3,7 @@ import path from 'node:path';
 
 import {
   type EvaluationResult,
+  type ResultArtifactPointersWire,
   type TraceSummary,
   type TranscriptJsonLine,
   buildTraceFromMessages,
@@ -43,6 +44,10 @@ export interface ResultManifestRecord {
   readonly output_path?: string;
   readonly answer_path?: string;
   readonly transcript_path?: string;
+  readonly raw_provider_log_path?: string;
+  readonly artifact_pointers?: ResultArtifactPointersWire;
+  readonly transcript?: ArtifactPointer;
+  readonly artifacts?: ArtifactPointerMap;
   readonly response_path?: string;
   readonly artifact_dir?: string;
   readonly task_dir?: string;
@@ -53,6 +58,36 @@ export interface ResultManifestRecord {
   readonly metadata?: Record<string, unknown>;
 }
 
+export type ArtifactPointer =
+  | string
+  | {
+      readonly path?: unknown;
+      readonly artifact_path?: unknown;
+      readonly relative_path?: unknown;
+      readonly ref?: unknown;
+      readonly storage?: unknown;
+      readonly uri?: unknown;
+      readonly href?: unknown;
+      readonly [key: string]: unknown;
+    };
+
+export interface ArtifactPointerMap {
+  readonly transcript_path?: string;
+  readonly answer_path?: string;
+  readonly transcript?: ArtifactPointer;
+  readonly answer?: ArtifactPointer;
+  readonly [key: string]: unknown;
+}
+
+export interface ManifestHydrationOptions {
+  /**
+   * Defaults to true for report/inspect consumers that need a trace projection.
+   * Dashboard detail routes set this false so transcript bodies are loaded only
+   * by the explicit transcript artifact endpoint.
+   */
+  readonly hydrateTranscriptTrace?: boolean;
+}
+
 function parseJsonlLines<T>(content: string): T[] {
   return content
     .split(/\r?\n/)
@@ -114,6 +149,33 @@ function readOptionalJson<T>(baseDir: string, relativePath: string | undefined):
   }
 }
 
+function nonEmptyString(value: unknown): string | undefined {
+  return typeof value === 'string' && value.trim().length > 0 ? value : undefined;
+}
+
+function artifactPointerPath(pointer: ArtifactPointer | undefined): string | undefined {
+  if (typeof pointer === 'string') {
+    return nonEmptyString(pointer);
+  }
+  if (!pointer) {
+    return undefined;
+  }
+  return (
+    nonEmptyString(pointer.path) ??
+    nonEmptyString(pointer.artifact_path) ??
+    nonEmptyString(pointer.relative_path)
+  );
+}
+
+function resolveTranscriptPath(record: ResultManifestRecord): string | undefined {
+  return (
+    record.transcript_path ??
+    record.artifact_pointers?.transcript?.path ??
+    record.artifacts?.transcript_path ??
+    artifactPointerPath(record.transcript ?? record.artifacts?.transcript)
+  );
+}
+
 function hydrateInput(
   baseDir: string,
   record: ResultManifestRecord,
@@ -142,13 +204,19 @@ function hydrateOutput(
   return responseText.trimEnd();
 }
 
-function hydrateTrace(baseDir: string, record: ResultManifestRecord): EvaluationResult['trace'] {
-  const transcriptText = readOptionalText(baseDir, record.transcript_path);
-  if (transcriptText) {
-    try {
-      return traceFromTranscriptJsonLines(parseJsonlLines<TranscriptJsonLine>(transcriptText));
-    } catch {
-      // Fall through to a minimal trace below.
+function hydrateTrace(
+  baseDir: string,
+  record: ResultManifestRecord,
+  options: ManifestHydrationOptions,
+): EvaluationResult['trace'] {
+  if (options.hydrateTranscriptTrace !== false) {
+    const transcriptText = readOptionalText(baseDir, resolveTranscriptPath(record));
+    if (transcriptText) {
+      try {
+        return traceFromTranscriptJsonLines(parseJsonlLines<TranscriptJsonLine>(transcriptText));
+      } catch {
+        // Fall through to a minimal trace below.
+      }
     }
   }
 
@@ -163,7 +231,11 @@ function hydrateTrace(baseDir: string, record: ResultManifestRecord): Evaluation
   });
 }
 
-function hydrateManifestRecord(baseDir: string, record: ResultManifestRecord): EvaluationResult {
+function hydrateManifestRecord(
+  baseDir: string,
+  record: ResultManifestRecord,
+  options: ManifestHydrationOptions,
+): EvaluationResult {
   const grading = readOptionalJson<GradingArtifact>(baseDir, record.grading_path);
   const timing = readOptionalJson<TimingArtifact>(baseDir, record.timing_path);
   const testId = record.test_id ?? 'unknown';
@@ -218,7 +290,7 @@ function hydrateManifestRecord(baseDir: string, record: ResultManifestRecord): E
     costUsd: record.cost_usd,
     input: hydrateInput(baseDir, record),
     output: hydrateOutput(baseDir, record) ?? '',
-    trace: hydrateTrace(baseDir, record),
+    trace: hydrateTrace(baseDir, record, options),
     metadata: record.metadata,
   } as EvaluationResult;
 }
@@ -235,12 +307,15 @@ export function resolveResultSourcePath(source: string, cwd?: string): string {
   return resolved;
 }
 
-export function loadManifestResults(sourceFile: string): EvaluationResult[] {
+export function loadManifestResults(
+  sourceFile: string,
+  options: ManifestHydrationOptions = {},
+): EvaluationResult[] {
   const resolvedSourceFile = resolveRunManifestPath(sourceFile);
   const content = readFileSync(resolvedSourceFile, 'utf8');
   const records = parseResultRows(content, resolvedSourceFile);
   const baseDir = path.dirname(resolvedSourceFile);
-  return records.map((record) => hydrateManifestRecord(baseDir, record));
+  return records.map((record) => hydrateManifestRecord(baseDir, record, options));
 }
 
 export interface LightweightResultRecord {
@@ -253,6 +328,7 @@ export interface LightweightResultRecord {
   readonly scores?: readonly Record<string, unknown>[];
   readonly executionStatus?: string;
   readonly error?: string;
+  readonly costUsd?: number;
   readonly timestamp?: string;
 }
 
@@ -269,6 +345,7 @@ export function loadLightweightResults(sourceFile: string): LightweightResultRec
     scores: record.scores,
     executionStatus: record.execution_status,
     error: record.error,
+    costUsd: record.cost_usd,
     timestamp: record.timestamp,
   }));
 }
diff --git a/apps/cli/src/commands/results/projection-bundle.ts b/apps/cli/src/commands/results/projection-bundle.ts
new file mode 100644
index 000000000..d01cb12c8
--- /dev/null
+++ b/apps/cli/src/commands/results/projection-bundle.ts
@@ -0,0 +1,405 @@
+/**
+ * Vendor-neutral projection bundle for completed AgentV runs.
+ *
+ * This file builds a deterministic, local JSON contract that adapter workers
+ * can consume without calling vendor SDKs. The bundle keeps AgentV artifacts as
+ * the source of truth, includes metadata-only OpenInference-shaped spans by
+ * default, and requires explicit opt-in before raw prompt/output/tool payloads
+ * are copied into the bundle.
+ */
+
+import { createHash } from 'node:crypto';
+import { mkdir, writeFile } from 'node:fs/promises';
+import path from 'node:path';
+
+import {
+  type EvaluationResult,
+  type ExportDuplicatePolicy,
+  type IndexArtifactEntry,
+  type ProjectionIdentityWire,
+  type TraceEnvelopeCaptureWire,
+  type TraceEnvelopeConversionWarningWire,
+  type TraceEnvelopeScoreWire,
+  type TraceEnvelopeWire,
+  buildResultIndexArtifact,
+  buildTraceEnvelopeFromEvaluationResult,
+  toTraceEnvelopeWire,
+} from '@agentv/core';
+
+export const PROJECTION_BUNDLE_FILENAME = 'projection_bundle.json';
+export const PROJECTION_BUNDLE_SCHEMA_VERSION = 'agentv.projection_bundle.v1';
+
+type JsonRecord = Record<string, unknown>;
+
+export interface ProjectionBundle {
+  readonly schema_version: typeof PROJECTION_BUNDLE_SCHEMA_VERSION;
+  readonly bundle_id: string;
+  readonly created_at: string;
+  readonly source: {
+    readonly kind: 'agentv_run';
+    readonly path: string;
+    readonly run_id: string;
+    readonly result_count: number;
+  };
+  readonly content_policy: {
+    readonly raw_content: 'excluded' | 'included';
+    readonly raw_content_opt_in: boolean;
+    readonly default_capture: 'metadata' | 'full';
+    readonly backend_anonymizer_boundary: 'adapter';
+  };
+  readonly capture_summary: TraceEnvelopeCaptureWire;
+  readonly entries: readonly ProjectionBundleEntry[];
+  readonly conversion_warnings?: readonly TraceEnvelopeConversionWarningWire[];
+}
+
+export interface ProjectionBundleEntry {
+  readonly projection_id: string;
+  readonly projection_identity: ProjectionIdentityWire;
+  readonly eval: TraceEnvelopeWire['eval'];
+  readonly artifact_refs: ProjectionBundleArtifactRefs;
+  readonly trace: {
+    readonly format: TraceEnvelopeWire['trace']['format'];
+    readonly trace_id: string;
+    readonly root_span_id: string;
+    readonly span_count: number;
+    readonly envelope_ref?: string;
+  };
+  readonly trace_envelope: TraceEnvelopeWire;
+  readonly feedback: {
+    readonly source: 'agentv_grading_artifacts';
+    readonly result_score: number;
+    readonly execution_status?: string;
+    readonly grading_path?: string;
+    readonly timing_path?: string;
+    readonly assertion_count: number;
+    readonly scores?: readonly TraceEnvelopeScoreWire[];
+  };
+  readonly capture: TraceEnvelopeCaptureWire;
+  readonly conversion_warnings?: readonly TraceEnvelopeConversionWarningWire[];
+  readonly raw_content?: {
+    readonly input?: unknown;
+    readonly output?: string;
+    readonly trace_messages?: unknown;
+  };
+}
+
+export type ProjectionBundleArtifactRefs = Partial<
+  Pick<
+    IndexArtifactEntry,
+    | 'artifact_dir'
+    | 'grading_path'
+    | 'timing_path'
+    | 'input_path'
+    | 'output_path'
+    | 'answer_path'
+    | 'response_path'
+    | 'transcript_path'
+    | 'task_dir'
+    | 'eval_path'
+    | 'targets_path'
+    | 'files_path'
+    | 'graders_path'
+  > & { readonly trace_path: string }
+> & {
+  readonly status: 'planned_export' | 'emitted';
+};
+
+export interface BuildProjectionBundleOptions {
+  readonly sourceFile: string;
+  readonly runId: string;
+  readonly cwd?: string;
+  readonly includeRawContent?: boolean;
+  readonly duplicatePolicy?: ExportDuplicatePolicy;
+  readonly artifactRefStatus?: ProjectionBundleArtifactRefs['status'];
+  readonly indexRecords?: readonly IndexArtifactEntry[];
+}
+
+function dropUndefined<T extends JsonRecord>(value: T): T {
+  return Object.fromEntries(
+    Object.entries(value).filter(([, entryValue]) => entryValue !== undefined),
+  ) as T;
+}
+
+function toPortablePath(filePath: string, cwd?: string): string {
+  const absolutePath = path.resolve(filePath);
+  const absoluteCwd = path.resolve(cwd ?? process.cwd());
+  const relative = path.relative(absoluteCwd, absolutePath);
+  const portable =
+    relative && !relative.startsWith('..') && !path.isAbsolute(relative) ? relative : absolutePath;
+  return portable.split(path.sep).join('/');
+}
+
+function stableDate(value: string | undefined): Date {
+  const parsed = value ? Date.parse(value) : Number.NaN;
+  return Number.isFinite(parsed) ? new Date(parsed) : new Date(0);
+}
+
+function bundleCreatedAt(results: readonly EvaluationResult[]): string {
+  const timestamps = results
+    .map((result) => stableDate(result.timestamp).toISOString())
+    .sort((a, b) => a.localeCompare(b));
+  return timestamps[0] ?? new Date(0).toISOString();
+}
+
+function shortHash(parts: readonly string[], length = 20): string {
+  return createHash('sha256').update(parts.join('\n')).digest('hex').slice(0, length);
+}
+
+function tracePathFor(indexEntry: IndexArtifactEntry): string | undefined {
+  return indexEntry.artifact_dir
+    ? path.posix.join(indexEntry.artifact_dir, 'outputs', 'trace.json')
+    : undefined;
+}
+
+function artifactRefs(
+  indexEntry: IndexArtifactEntry,
+  options: {
+    readonly includeRawContent: boolean;
+    readonly status: ProjectionBundleArtifactRefs['status'];
+  },
+): ProjectionBundleArtifactRefs {
+  const metadataRefs = dropUndefined({
+    status: options.status,
+    timing_path: indexEntry.timing_path,
+  });
+
+  if (!options.includeRawContent) {
+    return metadataRefs;
+  }
+
+  return dropUndefined({
+    ...metadataRefs,
+    artifact_dir: indexEntry.artifact_dir,
+    grading_path: indexEntry.grading_path,
+    input_path: indexEntry.input_path,
+    output_path: indexEntry.output_path,
+    answer_path: indexEntry.answer_path,
+    response_path: indexEntry.response_path,
+    transcript_path: indexEntry.transcript_path,
+    trace_path: tracePathFor(indexEntry),
+    task_dir: indexEntry.task_dir,
+    eval_path: indexEntry.eval_path,
+    targets_path: indexEntry.targets_path,
+    files_path: indexEntry.files_path,
+    graders_path: indexEntry.graders_path,
+  });
+}
+
+function removeTranscriptMessageMetadata(envelope: TraceEnvelopeWire): TraceEnvelopeWire {
+  return {
+    ...envelope,
+    trace: {
+      ...envelope.trace,
+      spans: envelope.trace.spans.map((span) => ({
+        ...span,
+        events: span.events?.map((event) => {
+          const transcriptMessage = event.attributes?.['agentv.transcript.message'];
+          if (
+            !transcriptMessage ||
+            typeof transcriptMessage !== 'object' ||
+            Array.isArray(transcriptMessage)
+          ) {
+            return event;
+          }
+          const { metadata: _metadata, ...safeMessage } = transcriptMessage as JsonRecord;
+          return {
+            ...event,
+            attributes: {
+              ...event.attributes,
+              'agentv.transcript.message': safeMessage,
+            },
+          };
+        }),
+      })),
+    },
+  };
+}
+
+function safeEnvelope(
+  envelope: TraceEnvelopeWire,
+  options: { includeRawContent: boolean },
+): TraceEnvelopeWire {
+  if (options.includeRawContent) {
+    return envelope;
+  }
+
+  const withoutRawEvidence = removeTranscriptMessageMetadata({
+    ...envelope,
+    source: {
+      ...envelope.source,
+      metadata: undefined,
+    },
+    artifacts: undefined,
+    scores: envelope.scores?.map(({ evidence: _evidence, ...score }) => score),
+  });
+
+  return JSON.parse(JSON.stringify(withoutRawEvidence)) as TraceEnvelopeWire;
+}
+
+function safeScores(
+  scores: readonly TraceEnvelopeScoreWire[] | undefined,
+  options: { includeRawContent: boolean },
+): readonly TraceEnvelopeScoreWire[] | undefined {
+  if (!scores) {
+    return undefined;
+  }
+  return options.includeRawContent
+    ? scores
+    : scores.map(({ evidence: _evidence, ...score }) => score);
+}
+
+function captureOptions(includeRawContent: boolean) {
+  return includeRawContent
+    ? { content: 'full' as const, redactionLevel: 'none' as const, redactedFields: [] }
+    : undefined;
+}
+
+function rawContent(result: EvaluationResult): ProjectionBundleEntry['raw_content'] {
+  return dropUndefined({
+    input: result.input,
+    output: result.output,
+    trace_messages: result.trace.messages,
+  });
+}
+
+function buildEntry(
+  result: EvaluationResult,
+  options: BuildProjectionBundleOptions,
+  indexRecord?: IndexArtifactEntry,
+): ProjectionBundleEntry {
+  const includeRawContent = options.includeRawContent ?? false;
+  const sourcePath = toPortablePath(options.sourceFile, options.cwd);
+  const plannedIndexEntry = buildResultIndexArtifact(result);
+  const envelope = buildTraceEnvelopeFromEvaluationResult(result, {
+    evalPath: sourcePath,
+    runId: options.runId,
+    source: { kind: 'agentv_run', path: sourcePath, format: 'agentv_result' },
+    artifacts: {
+      trace_path: tracePathFor(indexRecord ?? plannedIndexEntry),
+      answer_path: result.output.length > 0 ? 'outputs/answer.md' : undefined,
+      response_path: result.output.length > 0 ? 'outputs/response.md' : undefined,
+    },
+    duplicatePolicy: options.duplicatePolicy,
+    capture: captureOptions(includeRawContent),
+    now: () => stableDate(result.timestamp),
+  });
+  const projectionIdentity = envelope.projectionIdentity;
+  if (!projectionIdentity) {
+    throw new Error(`Result ${result.testId ?? 'unknown'} is missing projection identity`);
+  }
+
+  const indexEntry =
+    indexRecord ??
+    buildResultIndexArtifact(result, undefined, {
+      projectionIdentity,
+      duplicatePolicy: options.duplicatePolicy,
+    });
+  const refs = artifactRefs(indexEntry, {
+    includeRawContent,
+    status: options.artifactRefStatus ?? 'planned_export',
+  });
+  const safeEnvelopeWire = safeEnvelope(toTraceEnvelopeWire(envelope), { includeRawContent });
+  const projectionIdentityWire =
+    indexEntry.projection_identity ?? safeEnvelopeWire.projection_identity;
+  if (!projectionIdentityWire) {
+    throw new Error(`Result ${result.testId ?? 'unknown'} is missing projection identity`);
+  }
+  const envelopeWire = {
+    ...safeEnvelopeWire,
+    projection_identity: projectionIdentityWire,
+  };
+  const scores = safeScores(envelopeWire.scores, { includeRawContent });
+
+  const feedback: ProjectionBundleEntry['feedback'] = dropUndefined({
+    source: 'agentv_grading_artifacts',
+    result_score: result.score,
+    execution_status: result.executionStatus,
+    grading_path: refs.grading_path,
+    timing_path: refs.timing_path,
+    assertion_count: result.assertions?.length ?? 0,
+    scores,
+  });
+
+  return {
+    projection_id: projectionIdentity.id,
+    projection_identity: projectionIdentityWire,
+    eval: envelopeWire.eval,
+    artifact_refs: refs,
+    trace: dropUndefined({
+      format: envelopeWire.trace.format,
+      trace_id: envelopeWire.trace.trace_id,
+      root_span_id: envelopeWire.trace.root_span_id,
+      span_count: envelopeWire.trace.spans.length,
+      envelope_ref: refs.trace_path,
+    }),
+    trace_envelope: envelopeWire,
+    feedback,
+    capture: envelopeWire.capture,
+    ...(envelopeWire.conversion_warnings
+      ? { conversion_warnings: envelopeWire.conversion_warnings }
+      : {}),
+    ...(includeRawContent ? { raw_content: rawContent(result) } : {}),
+  };
+}
+
+export function buildProjectionBundle(
+  results: readonly EvaluationResult[],
+  options: BuildProjectionBundleOptions,
+): ProjectionBundle {
+  if (results.length === 0) {
+    throw new Error(`No results found in ${options.sourceFile}`);
+  }
+
+  const entries = results.map((result, index) =>
+    buildEntry(result, options, options.indexRecords?.[index]),
+  );
+  const includeRawContent = options.includeRawContent ?? false;
+  const artifactRefStatus = options.artifactRefStatus ?? 'planned_export';
+  const conversionWarnings = entries.flatMap((entry) => entry.conversion_warnings ?? []);
+  const bundleId = `projection-bundle-${shortHash([
+    PROJECTION_BUNDLE_SCHEMA_VERSION,
+    toPortablePath(options.sourceFile, options.cwd),
+    options.runId,
+    artifactRefStatus,
+    includeRawContent ? 'raw' : 'metadata',
+    ...entries.map((entry) => entry.projection_id),
+  ])}`;
+
+  return {
+    schema_version: PROJECTION_BUNDLE_SCHEMA_VERSION,
+    bundle_id: bundleId,
+    created_at: bundleCreatedAt(results),
+    source: {
+      kind: 'agentv_run',
+      path: toPortablePath(options.sourceFile, options.cwd),
+      run_id: options.runId,
+      result_count: results.length,
+    },
+    content_policy: {
+      raw_content: includeRawContent ? 'included' : 'excluded',
+      raw_content_opt_in: includeRawContent,
+      default_capture: includeRawContent ? 'full' : 'metadata',
+      backend_anonymizer_boundary: 'adapter',
+    },
+    capture_summary: entries[0]?.capture ?? {
+      content: includeRawContent ? 'full' : 'metadata',
+      redaction_level: includeRawContent ? 'none' : 'partial',
+    },
+    entries,
+    ...(conversionWarnings.length > 0 ? { conversion_warnings: conversionWarnings } : {}),
+  };
+}
+
+export function serializeProjectionBundle(bundle: ProjectionBundle): string {
+  return `${JSON.stringify(bundle, null, 2)}\n`;
+}
+
+export async function writeProjectionBundle(
+  bundle: ProjectionBundle,
+  outputDir: string,
+): Promise<string> {
+  const bundlePath = path.join(outputDir, PROJECTION_BUNDLE_FILENAME);
+  await mkdir(outputDir, { recursive: true });
+  await writeFile(bundlePath, serializeProjectionBundle(bundle), 'utf8');
+  return bundlePath;
+}
diff --git a/apps/cli/src/commands/results/remote-metadata.ts b/apps/cli/src/commands/results/remote-metadata.ts
index 00f57de7b..21513efae 100644
--- a/apps/cli/src/commands/results/remote-metadata.ts
+++ b/apps/cli/src/commands/results/remote-metadata.ts
@@ -17,6 +17,13 @@ import { execFileSync } from 'node:child_process';
 import { existsSync, mkdirSync, readFileSync, rmSync, writeFileSync } from 'node:fs';
 import path from 'node:path';
 
+import {
+  type RunOplogWatermark,
+  buildRunIdFromRelativePath,
+  createRunTagsSetOperation,
+  normalizeRunOplogWatermark,
+  watermarkFromRunOperation,
+} from './run-oplog.js';
 import { RUN_TAGS_FILENAME, normalizeTags } from './run-tags.js';
 
 const RESULTS_RUNS_DIR = 'runs';
@@ -25,6 +32,7 @@ const REMOTE_METADATA_RUNS_DIR = path.join('metadata', 'runs');
 interface TagsFile {
   readonly tags: string[];
   readonly updatedAt?: string;
+  readonly oplogWatermark?: RunOplogWatermark;
 }
 
 interface RemoteRunMetadataPaths {
@@ -48,6 +56,7 @@ export interface RemoteRunTagState {
   readonly pendingTags?: string[];
   readonly dirty: boolean;
   readonly updatedAt?: string;
+  readonly oplogWatermark: RunOplogWatermark;
   readonly metadataPath: string;
 }
 
@@ -112,9 +121,11 @@ function parseTagsFile(content: string): TagsFile | undefined {
   const record = parsed as Record<string, unknown>;
   if (!Array.isArray(record.tags)) return undefined;
   const tags = record.tags.filter((tag): tag is string => typeof tag === 'string');
+  const updatedAt = typeof record.updated_at === 'string' ? record.updated_at : undefined;
   return {
     tags,
-    updatedAt: typeof record.updated_at === 'string' ? record.updated_at : undefined,
+    updatedAt,
+    oplogWatermark: normalizeRunOplogWatermark(record.oplog_watermark, updatedAt),
   };
 }
 
@@ -123,6 +134,26 @@ function equalTags(a: readonly string[], b: readonly string[]): boolean {
   return a.every((tag, index) => tag === b[index]);
 }
 
+function equalWatermarks(
+  a: RunOplogWatermark | undefined,
+  b: RunOplogWatermark | undefined,
+): boolean {
+  return (
+    a?.ref === b?.ref && a?.operation_id === b?.operation_id && a?.updated_at === b?.updated_at
+  );
+}
+
+function equalTagFiles(a: TagsFile | undefined, b: TagsFile | undefined): boolean {
+  if (a === undefined || b === undefined) {
+    return a === b;
+  }
+  return (
+    equalTags(a.tags, b.tags) &&
+    a.updatedAt === b.updatedAt &&
+    equalWatermarks(a.oplogWatermark, b.oplogWatermark)
+  );
+}
+
 function resolveComparisonRef(repoDir: string): string | undefined {
   const upstream = tryRunGit(repoDir, [
     'rev-parse',
@@ -188,7 +219,14 @@ function readRemoteRunTagsContext(repoDir: string, manifestPath: string): Remote
 function toRemoteRunTagState(context: RemoteRunTagsContext): RemoteRunTagState {
   const remoteTags = context.baseOverlayTags?.tags ?? context.artifactTags?.tags ?? [];
   const effectiveTags = context.localOverlayTags?.tags ?? remoteTags;
-  const dirty = !equalTags(effectiveTags, remoteTags);
+  const dirty = context.localOverlayTags
+    ? !equalTagFiles(context.localOverlayTags, context.baseOverlayTags)
+    : !equalTags(effectiveTags, remoteTags);
+  const watermark =
+    context.localOverlayTags?.oplogWatermark ??
+    context.baseOverlayTags?.oplogWatermark ??
+    context.artifactTags?.oplogWatermark ??
+    normalizeRunOplogWatermark(undefined);
 
   return {
     tags: effectiveTags,
@@ -199,6 +237,7 @@ function toRemoteRunTagState(context: RemoteRunTagsContext): RemoteRunTagState {
       context.localOverlayTags?.updatedAt ??
       context.baseOverlayTags?.updatedAt ??
       context.artifactTags?.updatedAt,
+    oplogWatermark: watermark,
     metadataPath: context.paths.overlayTagsPath,
   };
 }
@@ -235,14 +274,25 @@ export function writeRemoteRunTags(
   const context = readRemoteRunTagsContext(repoDir, manifestPath);
   const remoteTags = context.baseOverlayTags?.tags ?? context.artifactTags?.tags ?? [];
 
-  if (equalTags(cleaned, remoteTags) && context.baseOverlayTags === undefined) {
+  if (
+    cleaned.length > 0 &&
+    equalTags(cleaned, remoteTags) &&
+    context.baseOverlayTags === undefined
+  ) {
     rmSync(context.paths.overlayTagsPath, { force: true });
     return readRemoteRunTags(repoDir, manifestPath);
   }
 
+  const operation = createRunTagsSetOperation({
+    runId: buildRunIdFromRelativePath(context.paths.runRelativePath),
+    runPath: context.paths.runRelativePath,
+    tags: cleaned,
+    actor: { kind: 'dashboard' },
+  });
   const entry = {
     tags: cleaned,
-    updated_at: new Date().toISOString(),
+    updated_at: operation.authored_at,
+    oplog_watermark: watermarkFromRunOperation(operation),
   };
   mkdirSync(path.dirname(context.paths.overlayTagsPath), { recursive: true });
   writeFileSync(context.paths.overlayTagsPath, `${JSON.stringify(entry, null, 2)}\n`, 'utf8');
diff --git a/apps/cli/src/commands/results/run-oplog.ts b/apps/cli/src/commands/results/run-oplog.ts
new file mode 100644
index 000000000..c307e5841
--- /dev/null
+++ b/apps/cli/src/commands/results/run-oplog.ts
@@ -0,0 +1,141 @@
+import { randomUUID } from 'node:crypto';
+
+/**
+ * Minimal run operation-log contract used by Dashboard read models.
+ *
+ * The raw oplog storage branch is intentionally not implemented here. This
+ * module only centralizes the ref name, a small typed operation envelope for
+ * tag replacement, and the materialized final-state shape that readers consume.
+ */
+
+export const RUN_OPLOG_REF = 'agentv/oplog/v1';
+export const RUN_OPERATION_SCHEMA_VERSION = 'agentv.run_operation.v1';
+
+export type RunFinalStateLifecycle = 'active' | 'hidden' | 'deleted';
+
+export interface RunOplogWatermark {
+  readonly ref: typeof RUN_OPLOG_REF;
+  readonly operation_id?: string;
+  readonly updated_at?: string;
+}
+
+export interface RunFinalState {
+  readonly lifecycle: RunFinalStateLifecycle;
+  readonly tags: string[];
+}
+
+export interface RunReadStateFields {
+  readonly final_state: RunFinalState;
+  readonly oplog_watermark: RunOplogWatermark;
+}
+
+export type RunOperationActorKind = 'dashboard' | 'cli' | 'ci' | 'agent' | 'unknown';
+
+export interface RunOperationActor {
+  readonly kind: RunOperationActorKind;
+  readonly id?: string;
+}
+
+export interface RunOperationSubject {
+  readonly run_id: string;
+  readonly run_path?: string;
+}
+
+export interface RunTagsSetOperation {
+  readonly schema_version: typeof RUN_OPERATION_SCHEMA_VERSION;
+  readonly operation_id: string;
+  readonly operation_type: 'run.tags.set';
+  readonly authored_at: string;
+  readonly actor: RunOperationActor;
+  readonly subject: RunOperationSubject;
+  readonly payload: {
+    readonly tags: string[];
+  };
+}
+
+export type RunOperationEnvelope = RunTagsSetOperation;
+
+export function buildRunIdFromRelativePath(relativeRunPath: string): string {
+  const segments = relativeRunPath.split(/[\\/]+/).filter(Boolean);
+  if (segments.length >= 2) {
+    const experiment = segments.slice(0, -1).join('/');
+    const runName = segments.at(-1) ?? relativeRunPath;
+    return experiment === 'default' ? runName : `${experiment}::${runName}`;
+  }
+  return segments[0] ?? relativeRunPath;
+}
+
+export function createRunTagsSetOperation(input: {
+  readonly runId: string;
+  readonly runPath?: string;
+  readonly tags: readonly string[];
+  readonly actor?: RunOperationActor;
+  readonly authoredAt?: string;
+  readonly operationId?: string;
+}): RunTagsSetOperation {
+  return {
+    schema_version: RUN_OPERATION_SCHEMA_VERSION,
+    operation_id: input.operationId ?? randomUUID(),
+    operation_type: 'run.tags.set',
+    authored_at: input.authoredAt ?? new Date().toISOString(),
+    actor: input.actor ?? { kind: 'unknown' },
+    subject: {
+      run_id: input.runId,
+      ...(input.runPath ? { run_path: input.runPath } : {}),
+    },
+    payload: {
+      tags: [...input.tags],
+    },
+  };
+}
+
+export function watermarkFromRunOperation(operation: RunOperationEnvelope): RunOplogWatermark {
+  return {
+    ref: RUN_OPLOG_REF,
+    operation_id: operation.operation_id,
+    updated_at: operation.authored_at,
+  };
+}
+
+export function normalizeRunOplogWatermark(
+  input: unknown,
+  fallbackUpdatedAt?: string,
+): RunOplogWatermark {
+  if (input && typeof input === 'object') {
+    const record = input as Record<string, unknown>;
+    const operationId = record.operation_id;
+    const updatedAt = record.updated_at;
+    return {
+      ref: RUN_OPLOG_REF,
+      ...(typeof operationId === 'string' && operationId ? { operation_id: operationId } : {}),
+      ...(typeof updatedAt === 'string' && updatedAt
+        ? { updated_at: updatedAt }
+        : fallbackUpdatedAt
+          ? { updated_at: fallbackUpdatedAt }
+          : {}),
+    };
+  }
+
+  return {
+    ref: RUN_OPLOG_REF,
+    ...(fallbackUpdatedAt ? { updated_at: fallbackUpdatedAt } : {}),
+  };
+}
+
+export function materializeRunState(input?: {
+  readonly lifecycle?: RunFinalStateLifecycle;
+  readonly tags?: readonly string[];
+  readonly watermark?: RunOplogWatermark;
+  readonly updatedAt?: string;
+}): RunReadStateFields {
+  const tags = [...(input?.tags ?? [])];
+  const watermark = input?.watermark ?? normalizeRunOplogWatermark(undefined, input?.updatedAt);
+
+  return {
+    final_state: {
+      lifecycle: input?.lifecycle ?? 'active',
+      tags,
+    },
+    oplog_watermark: watermark,
+  };
+}
diff --git a/apps/cli/src/commands/results/run-tags.ts b/apps/cli/src/commands/results/run-tags.ts
index 3714f73d7..9464aa068 100644
--- a/apps/cli/src/commands/results/run-tags.ts
+++ b/apps/cli/src/commands/results/run-tags.ts
@@ -7,20 +7,24 @@
  *
  * Wire format (stored on disk):
  * ```json
- * { "tags": ["baseline", "v2-prompt"], "updated_at": "2026-04-10T00:00:00.000Z" }
+ * {
+ *   "tags": ["baseline", "v2-prompt"],
+ *   "updated_at": "2026-04-10T00:00:00.000Z",
+ *   "oplog_watermark": { "ref": "agentv/oplog/v1" }
+ * }
  * ```
  *
  * Used by the Dashboard compare API so users can retroactively tag runs
- * without changing the eval YAML or the run manifest itself. This mirrors
- * the Langfuse / W&B / GitHub `tags` pattern — a mutable multi-valued
- * list of free-form labels that lives alongside the immutable run_id.
+ * without changing the eval YAML or the run manifest itself. Tags are a
+ * mutable multi-valued list of free-form labels that lives alongside the
+ * immutable run_id.
  *
  * Validation rules:
  *   - Each tag is 1–60 characters after trimming
  *   - No control characters (\n, \t, DEL, etc.)
  *   - Tags are deduplicated case-sensitively
  *   - A run can have at most 20 tags
- *   - Writing an empty array removes the sidecar file
+ *   - Writing an empty array records a clear/tombstone state with a watermark
  *
  * To extend (e.g. add colored labels or descriptions): add optional fields
  * to `RunTagsFile` and keep the schema additive so older files still parse.
@@ -29,6 +33,14 @@
 import { existsSync, readFileSync, unlinkSync, writeFileSync } from 'node:fs';
 import path from 'node:path';
 
+import {
+  type RunOplogWatermark,
+  buildRunIdFromRelativePath,
+  createRunTagsSetOperation,
+  normalizeRunOplogWatermark,
+  watermarkFromRunOperation,
+} from './run-oplog.js';
+
 export const RUN_TAGS_FILENAME = 'tags.json';
 
 /** Maximum number of tags per run. */
@@ -42,6 +54,8 @@ export interface RunTagsFile {
   tags: string[];
   /** ISO-8601 timestamp of last update. */
   updated_at: string;
+  /** Watermark for the operation-log state this materialized tag list reflects. */
+  oplog_watermark?: RunOplogWatermark;
 }
 
 /** Resolve the tags sidecar path given a run manifest (index.jsonl) path. */
@@ -49,6 +63,16 @@ export function runTagsPath(manifestPath: string): string {
   return path.join(path.dirname(manifestPath), RUN_TAGS_FILENAME);
 }
 
+function inferRunRelativePath(manifestPath: string): string {
+  const runDir = path.dirname(manifestPath);
+  const segments = runDir.split(path.sep);
+  const runsIndex = segments.lastIndexOf('runs');
+  if (runsIndex >= 0 && runsIndex < segments.length - 1) {
+    return segments.slice(runsIndex + 1).join('/');
+  }
+  return path.basename(runDir);
+}
+
 /** Read the tags for a run. Returns `undefined` if missing or unreadable. */
 export function readRunTags(manifestPath: string): RunTagsFile | undefined {
   const fp = runTagsPath(manifestPath);
@@ -61,10 +85,11 @@ export function readRunTags(manifestPath: string): RunTagsFile | undefined {
     const tags = record.tags.filter(
       (t): t is string => typeof t === 'string' && t.trim().length > 0,
     );
-    if (tags.length === 0) return undefined;
+    const updatedAt = typeof record.updated_at === 'string' ? record.updated_at : '';
     return {
       tags,
-      updated_at: typeof record.updated_at === 'string' ? record.updated_at : '',
+      updated_at: updatedAt,
+      oplog_watermark: normalizeRunOplogWatermark(record.oplog_watermark, updatedAt || undefined),
     };
   } catch {
     return undefined;
@@ -72,18 +97,22 @@ export function readRunTags(manifestPath: string): RunTagsFile | undefined {
 }
 
 /**
- * Write tags for a run. Replaces any existing tags. Pass an empty array
- * to remove the sidecar entirely.
+ * Write tags for a run. Replaces any existing tags. Pass an empty array to
+ * record that tags were intentionally cleared while preserving the watermark.
  */
-export function writeRunTags(manifestPath: string, tags: readonly string[]): RunTagsFile | null {
+export function writeRunTags(manifestPath: string, tags: readonly string[]): RunTagsFile {
   const cleaned = normalizeTags(tags);
-  if (cleaned.length === 0) {
-    deleteRunTags(manifestPath);
-    return null;
-  }
+  const runPath = inferRunRelativePath(manifestPath);
+  const operation = createRunTagsSetOperation({
+    runId: buildRunIdFromRelativePath(runPath),
+    runPath,
+    tags: cleaned,
+    actor: { kind: 'dashboard' },
+  });
   const entry: RunTagsFile = {
     tags: cleaned,
-    updated_at: new Date().toISOString(),
+    updated_at: operation.authored_at,
+    oplog_watermark: watermarkFromRunOperation(operation),
   };
   writeFileSync(runTagsPath(manifestPath), `${JSON.stringify(entry, null, 2)}\n`, 'utf8');
   return entry;
diff --git a/apps/cli/src/commands/results/serve.ts b/apps/cli/src/commands/results/serve.ts
index 839fc68a5..a846f0beb 100644
--- a/apps/cli/src/commands/results/serve.ts
+++ b/apps/cli/src/commands/results/serve.ts
@@ -34,7 +34,15 @@
  *   - createApp(results, cwd) — Hono app factory
  */
 
-import { existsSync, mkdirSync, readFileSync, readdirSync, statSync, writeFileSync } from 'node:fs';
+import {
+  existsSync,
+  mkdirSync,
+  readFileSync,
+  readdirSync,
+  realpathSync,
+  statSync,
+  writeFileSync,
+} from 'node:fs';
 import { homedir } from 'node:os';
 import path from 'node:path';
 import { fileURLToPath } from 'node:url';
@@ -67,7 +75,12 @@ import {
 } from './combine-run.js';
 import { deleteLocalRun } from './delete-run.js';
 import { getActiveRunStatus, getActiveRunTarget, registerEvalRoutes } from './eval-runner.js';
-import { loadLightweightResults, loadManifestResults, parseResultManifest } from './manifest.js';
+import {
+  type ResultManifestRecord,
+  loadLightweightResults,
+  loadManifestResults,
+  parseResultManifest,
+} from './manifest.js';
 import {
   type SourcedResultFileMeta,
   clearRemoteRunTags,
@@ -79,7 +92,13 @@ import {
   setRemoteRunTags,
   syncRemoteResults,
 } from './remote.js';
-import { deleteRunTags, readRunTags, writeRunTags } from './run-tags.js';
+import {
+  type RunFinalState,
+  type RunOplogWatermark,
+  type RunReadStateFields,
+  materializeRunState,
+} from './run-oplog.js';
+import { readRunTags, writeRunTags } from './run-tags.js';
 import { type StudioConfig, loadStudioConfig, saveStudioConfig } from './studio-config.js';
 
 // ── Source resolution ────────────────────────────────────────────────────
@@ -285,6 +304,152 @@ function contentDispositionFilename(filePath: string): string {
   return path.basename(filePath).replace(/["\\\r\n]/g, '_');
 }
 
+function isRecord(value: unknown): value is Record<string, unknown> {
+  return typeof value === 'object' && value !== null && !Array.isArray(value);
+}
+
+function nonEmptyString(value: unknown): string | undefined {
+  return typeof value === 'string' && value.trim() ? value.trim() : undefined;
+}
+
+function artifactPointerPath(pointer: unknown): string | undefined {
+  if (typeof pointer === 'string') return nonEmptyString(pointer);
+  if (!isRecord(pointer)) return undefined;
+  return (
+    nonEmptyString(pointer.path) ??
+    nonEmptyString(pointer.artifact_path) ??
+    nonEmptyString(pointer.relative_path)
+  );
+}
+
+function artifactPointerDescription(pointer: unknown): string | undefined {
+  if (typeof pointer === 'string') return pointer;
+  if (!isRecord(pointer)) return undefined;
+  const ref = nonEmptyString(pointer.ref);
+  const storage = nonEmptyString(pointer.storage);
+  const uri = nonEmptyString(pointer.uri) ?? nonEmptyString(pointer.href);
+  const pointerPath = artifactPointerPath(pointer);
+  const parts = [
+    ref ? `ref ${ref}` : undefined,
+    storage ? `storage ${storage}` : undefined,
+    uri ? `uri ${uri}` : undefined,
+    pointerPath ? `path ${pointerPath}` : undefined,
+  ].filter((part): part is string => part !== undefined);
+  return parts.length > 0 ? parts.join(', ') : undefined;
+}
+
+function artifactPointerRef(pointer: unknown): string | undefined {
+  return isRecord(pointer) ? nonEmptyString(pointer.ref) : undefined;
+}
+
+interface ResolvedArtifactPointer {
+  readonly path?: string;
+  readonly description?: string;
+  readonly ref?: string;
+  readonly unsupportedReason?: string;
+}
+
+function resolveRecordArtifactPointer(
+  record: ResultManifestRecord,
+  kind: 'transcript' | 'answer',
+): ResolvedArtifactPointer {
+  const directPath =
+    kind === 'transcript'
+      ? (record.transcript_path ?? record.artifacts?.transcript_path)
+      : (record.answer_path ?? record.artifacts?.answer_path ?? record.output_path);
+  if (directPath) {
+    return { path: directPath, description: directPath };
+  }
+
+  const pointer =
+    kind === 'transcript'
+      ? (record.transcript ?? record.artifacts?.transcript ?? record.artifact_pointers?.transcript)
+      : record.artifacts?.answer;
+  const pointerPath = artifactPointerPath(pointer);
+  const description = artifactPointerDescription(pointer);
+  const ref = artifactPointerRef(pointer);
+  if (pointerPath) {
+    return { path: pointerPath, description, ref };
+  }
+  if (pointer) {
+    return {
+      description,
+      ref,
+      unsupportedReason: description
+        ? `${kind} artifact pointer does not include a local path (${description}).`
+        : `${kind} artifact pointer does not include a local path.`,
+    };
+  }
+  return {};
+}
+
+function resolveRunArtifactPath(
+  baseDir: string,
+  relativePath: string,
+): { absolutePath?: string; error?: string } {
+  const absolutePath = path.resolve(baseDir, relativePath);
+  const resolvedBase = path.resolve(baseDir);
+  if (!isPathInsideDirectory(resolvedBase, absolutePath)) {
+    return { error: 'Artifact path is outside the run workspace.' };
+  }
+  return { absolutePath };
+}
+
+function isPathInsideDirectory(baseDir: string, candidatePath: string): boolean {
+  const relative = path.relative(baseDir, candidatePath);
+  return (
+    relative === '' || (!!relative && !relative.startsWith('..') && !path.isAbsolute(relative))
+  );
+}
+
+function resolveReadableRunArtifactFile(
+  baseDir: string,
+  relativePath: string,
+): { absolutePath?: string; error?: string } {
+  const resolved = resolveRunArtifactPath(baseDir, relativePath);
+  if (!resolved.absolutePath) return { error: resolved.error };
+
+  let realBase: string;
+  let realArtifact: string;
+  try {
+    realBase = realpathSync(baseDir);
+    realArtifact = realpathSync(resolved.absolutePath);
+  } catch {
+    return {};
+  }
+
+  if (!isPathInsideDirectory(realBase, realArtifact)) {
+    return { error: 'Artifact path is outside the run workspace.' };
+  }
+
+  try {
+    if (!statSync(realArtifact).isFile()) {
+      return {};
+    }
+  } catch {
+    return {};
+  }
+
+  return { absolutePath: realArtifact };
+}
+
+function readOptionalRunArtifactText(
+  baseDir: string,
+  artifact: ResolvedArtifactPointer,
+): string | undefined {
+  if (!artifact.path) return undefined;
+  const resolved = resolveReadableRunArtifactFile(baseDir, artifact.path);
+  if (!resolved.absolutePath) return undefined;
+  return readFileSync(resolved.absolutePath, 'utf8');
+}
+
+function missingTranscriptMessage(): string {
+  return [
+    'This result does not include canonical outputs/transcript.jsonl metadata.',
+    'Dashboard does not parse response.md or markdown transcripts for this view.',
+  ].join(' ');
+}
+
 function stripHeavyFields(results: readonly EvaluationResult[]) {
   return results.map((r) => {
     const { requests, trace, ...rest } = r as EvaluationResult & Record<string, unknown>;
@@ -316,6 +481,8 @@ interface RunTagFields {
   readonly remote_tags?: string[];
   readonly pending_tags?: string[];
   readonly metadata_dirty?: boolean;
+  readonly final_state: RunFinalState;
+  readonly oplog_watermark: RunOplogWatermark;
 }
 
 // biome-ignore lint/suspicious/noExplicitAny: Hono Context generic varies by route
@@ -340,7 +507,15 @@ async function readRunTagFields(
 ): Promise<RunTagFields> {
   if (meta.source === 'local') {
     const tagsEntry = readRunTags(meta.path);
-    return tagsEntry ? { tags: tagsEntry.tags } : {};
+    const runState = materializeRunState({
+      tags: tagsEntry?.tags ?? [],
+      watermark: tagsEntry?.oplog_watermark,
+      updatedAt: tagsEntry?.updated_at || undefined,
+    });
+    return {
+      ...(tagsEntry ? { tags: tagsEntry.tags } : {}),
+      ...runState,
+    };
   }
 
   const state = await readRemoteRunTagState(searchDir, meta, projectId);
@@ -349,6 +524,7 @@ async function readRunTagFields(
       tags: [],
       remote_tags: [],
       metadata_dirty: false,
+      ...materializeRunState({ tags: [] }),
     };
   }
 
@@ -357,6 +533,11 @@ async function readRunTagFields(
     remote_tags: state.remoteTags,
     metadata_dirty: state.dirty,
     ...(state.dirty && { pending_tags: state.pendingTags ?? state.tags }),
+    ...materializeRunState({
+      tags: state.tags,
+      watermark: state.oplogWatermark,
+      updatedAt: state.updatedAt,
+    }),
   };
 }
 
@@ -366,16 +547,34 @@ function remoteTagMutationResponse(state: {
   readonly pendingTags?: string[];
   readonly dirty: boolean;
   readonly updatedAt?: string;
+  readonly oplogWatermark: RunOplogWatermark;
 }) {
   return {
     tags: state.tags,
     remote_tags: state.remoteTags,
     metadata_dirty: state.dirty,
     ...(state.dirty && { pending_tags: state.pendingTags ?? state.tags }),
+    ...materializeRunState({
+      tags: state.tags,
+      watermark: state.oplogWatermark,
+      updatedAt: state.updatedAt,
+    }),
     updated_at: state.updatedAt ?? new Date().toISOString(),
   };
 }
 
+function localTagMutationResponse(input: {
+  readonly tags: readonly string[];
+  readonly updatedAt?: string;
+  readonly watermark?: RunOplogWatermark;
+}): RunReadStateFields {
+  return materializeRunState({
+    tags: input.tags,
+    watermark: input.watermark,
+    updatedAt: input.updatedAt,
+  });
+}
+
 function remoteMetadataErrorStatus(error: unknown): 400 | 409 {
   const message = error instanceof Error ? error.message : String(error);
   if (
@@ -402,7 +601,7 @@ async function loadManifestResultsForMeta(
   projectId?: string,
 ): Promise<EvaluationResult[]> {
   await ensureRunReadable(searchDir, meta, projectId);
-  return loadManifestResults(meta.path);
+  return loadManifestResults(meta.path, { hydrateTranscriptTrace: false });
 }
 
 async function loadLightweightResultsForMeta(
@@ -824,6 +1023,8 @@ async function handleEvalFiles(c: C, { searchDir, projectId }: DataContext) {
     if (!record) return c.json({ error: 'Eval not found' }, 404);
 
     const baseDir = path.dirname(meta.path);
+    const transcriptArtifact = resolveRecordArtifactPointer(record, 'transcript');
+    const answerArtifact = resolveRecordArtifactPointer(record, 'answer');
     const knownPaths = [
       record.grading_path,
       record.timing_path,
@@ -832,12 +1033,14 @@ async function handleEvalFiles(c: C, { searchDir, projectId }: DataContext) {
       record.response_path,
       record.answer_path,
       record.transcript_path,
+      transcriptArtifact.path,
+      answerArtifact.path,
       record.task_dir,
       record.eval_path,
       record.targets_path,
       record.files_path,
       record.graders_path,
-    ].filter((p): p is string => !!p);
+    ].filter((p, index, all): p is string => !!p && all.indexOf(p) === index);
 
     if (knownPaths.length === 0) return c.json({ files: [] });
 
@@ -877,39 +1080,93 @@ async function handleEvalFileContent(c: C, { searchDir, projectId }: DataContext
 
   await ensureRunReadable(searchDir, meta, projectId);
   const baseDir = path.dirname(meta.path);
-  const absolutePath = path.resolve(baseDir, filePath);
-
-  // Security: prevent path traversal — resolved path must be inside baseDir
-  if (
-    !absolutePath.startsWith(path.resolve(baseDir) + path.sep) &&
-    absolutePath !== path.resolve(baseDir)
-  ) {
+  const resolvedFile = resolveReadableRunArtifactFile(baseDir, filePath);
+  if (resolvedFile.error) {
     return c.json({ error: 'Path traversal not allowed' }, 403);
   }
-
-  if (!existsSync(absolutePath) || !statSync(absolutePath).isFile()) {
+  if (!resolvedFile.absolutePath) {
     return c.json({ error: 'File not found' }, 404);
   }
 
   try {
-    const fileContent = readFileSync(absolutePath, 'utf8');
+    const fileContent = readFileSync(resolvedFile.absolutePath, 'utf8');
     if (c.req.query('raw') === '1' || c.req.query('download') === '1') {
-      c.header('Content-Type', inferRawContentType(absolutePath));
+      c.header('Content-Type', inferRawContentType(filePath));
       if (c.req.query('download') === '1') {
         c.header(
           'Content-Disposition',
-          `attachment; filename="${contentDispositionFilename(absolutePath)}"`,
+          `attachment; filename="${contentDispositionFilename(filePath)}"`,
         );
       }
       return c.body(fileContent);
     }
-    const language = inferLanguage(absolutePath);
+    const language = inferLanguage(filePath);
     return c.json({ content: fileContent, language });
   } catch {
     return c.json({ error: 'Failed to read file' }, 500);
   }
 }
 
+async function handleEvalTranscript(c: C, { searchDir, projectId }: DataContext) {
+  const filename = c.req.param('filename') ?? '';
+  const evalId = c.req.param('evalId');
+  const meta = await findRunById(searchDir, filename, projectId);
+  if (!meta) return c.json({ error: 'Run not found' }, 404);
+
+  try {
+    const records = await parseManifestForMeta(searchDir, meta, projectId);
+    const record = records.find((r) => r.test_id === evalId);
+    if (!record) return c.json({ error: 'Eval not found' }, 404);
+
+    const baseDir = path.dirname(meta.path);
+    const transcript = resolveRecordArtifactPointer(record, 'transcript');
+    const answer = resolveRecordArtifactPointer(record, 'answer');
+
+    if (!transcript.path) {
+      return c.json({
+        status: transcript.unsupportedReason ? 'unsupported' : 'missing',
+        message: transcript.unsupportedReason ?? missingTranscriptMessage(),
+        ...(transcript.description && { pointer: transcript.description }),
+      });
+    }
+
+    const resolvedTranscript = resolveReadableRunArtifactFile(baseDir, transcript.path);
+    if (resolvedTranscript.error) {
+      return c.json({
+        status: 'dangling',
+        transcript_path: transcript.path,
+        message: resolvedTranscript.error ?? 'Transcript artifact path could not be resolved.',
+        ...(transcript.description && { pointer: transcript.description }),
+      });
+    }
+
+    if (!resolvedTranscript.absolutePath) {
+      const refMessage = transcript.ref ? ` on ${transcript.ref}` : '';
+      return c.json({
+        status: 'dangling',
+        transcript_path: transcript.path,
+        message: `Transcript artifact pointer${refMessage} is present, but ${transcript.path} is not available in this run workspace.`,
+        ...(transcript.description && { pointer: transcript.description }),
+      });
+    }
+
+    const content = readFileSync(resolvedTranscript.absolutePath, 'utf8');
+    const answerContent = readOptionalRunArtifactText(baseDir, answer);
+
+    return c.json({
+      status: 'ok',
+      transcript_path: transcript.path,
+      content,
+      language: inferLanguage(transcript.path),
+      ...(answer.path && { answer_path: answer.path }),
+      ...(answerContent !== undefined && { answer_content: answerContent }),
+      ...(transcript.description && { pointer: transcript.description }),
+    });
+  } catch {
+    return c.json({ error: 'Failed to load transcript artifact' }, 500);
+  }
+}
+
 async function handleExperiments(c: C, { searchDir, agentvDir, projectId }: DataContext) {
   const { runs: metas } = await listMergedResultFiles(searchDir, undefined, projectId);
   const { threshold: pass_threshold } = loadStudioConfig(agentvDir);
@@ -1025,6 +1282,8 @@ async function handleCompare(c: C, { searchDir, agentvDir, projectId }: DataCont
     remote_tags?: string[];
     pending_tags?: string[];
     metadata_dirty?: boolean;
+    final_state: RunFinalState;
+    oplog_watermark: RunOplogWatermark;
     source: 'local' | 'remote';
     eval_count: number;
     quality_count: number;
@@ -1472,8 +1731,14 @@ async function handleRunTagsPut(c: C, { searchDir, projectId }: DataContext) {
     }
 
     const entry = writeRunTags(meta.path, tags as string[]);
+    const responseState = localTagMutationResponse({
+      tags: entry?.tags ?? [],
+      updatedAt: entry?.updated_at,
+      watermark: entry?.oplog_watermark,
+    });
     return c.json({
       tags: entry?.tags ?? [],
+      ...responseState,
       updated_at: entry?.updated_at ?? new Date().toISOString(),
     });
   } catch (err) {
@@ -1494,8 +1759,18 @@ async function handleRunTagsDelete(c: C, { searchDir, projectId }: DataContext)
       });
     }
 
-    deleteRunTags(meta.path);
-    return c.json({ ok: true });
+    const entry = writeRunTags(meta.path, []);
+    const responseState = localTagMutationResponse({
+      tags: entry.tags,
+      updatedAt: entry.updated_at,
+      watermark: entry.oplog_watermark,
+    });
+    return c.json({
+      ok: true,
+      tags: entry.tags,
+      ...responseState,
+      updated_at: entry.updated_at,
+    });
   } catch (err) {
     return c.json({ error: (err as Error).message }, remoteMetadataErrorStatus(err));
   }
@@ -1831,6 +2106,8 @@ export function createApp(
       remote_tags?: string[];
       pending_tags?: string[];
       metadata_dirty?: boolean;
+      final_state: RunFinalState;
+      oplog_watermark: RunOplogWatermark;
       source: 'local' | 'remote';
       project_id: string;
       project_name: string;
@@ -1946,6 +2223,9 @@ export function createApp(
     handleCategorySuites(c, defaultCtx),
   );
   app.get('/api/runs/:filename/evals/:evalId', (c) => handleEvalDetail(c, defaultCtx));
+  app.get('/api/runs/:filename/evals/:evalId/transcript', (c) =>
+    handleEvalTranscript(c, defaultCtx),
+  );
   app.get('/api/runs/:filename/evals/:evalId/files', (c) => handleEvalFiles(c, defaultCtx));
   app.get('/api/runs/:filename/evals/:evalId/files/*', (c) => handleEvalFileContent(c, defaultCtx));
   app.get('/api/experiments', (c) => handleExperiments(c, defaultCtx));
@@ -1976,11 +2256,11 @@ export function createApp(
         let testCount = m.testCount;
         let executionErrorCount = 0;
         try {
-          const loaded = await loadManifestResultsForMeta(searchDir, m, defaultCtx.projectId);
-          totalCostUsd = loaded.reduce((sum, r) => sum + (r.costUsd ?? 0), 0);
-          if (loaded.length > 0) {
+          const records = await loadLightweightResultsForMeta(searchDir, m, defaultCtx.projectId);
+          totalCostUsd = records.reduce((sum, r) => sum + (r.costUsd ?? 0), 0);
+          if (records.length > 0) {
             const qualitySummary = summarizeQualityResults(
-              loaded,
+              records,
               loadStudioConfig(agentvDir).threshold,
             );
             testCount = qualitySummary.totalCount;
@@ -2064,6 +2344,9 @@ export function createApp(
   app.get('/api/projects/:projectId/runs/:filename/evals/:evalId', (c) =>
     withProject(c, handleEvalDetail),
   );
+  app.get('/api/projects/:projectId/runs/:filename/evals/:evalId/transcript', (c) =>
+    withProject(c, handleEvalTranscript),
+  );
   app.get('/api/projects/:projectId/runs/:filename/evals/:evalId/files', (c) =>
     withProject(c, handleEvalFiles),
   );
@@ -2283,19 +2566,19 @@ export const resultsServeCommand = command({
       // project's configured run workspace and fall back to the empty state.
       if (source) {
         sourceFile = await resolveSourceFile(source, cwd);
-        results = loadManifestResults(sourceFile);
+        results = loadManifestResults(sourceFile, { hydrateTranscriptTrace: false });
       } else {
         // Auto-discover: run cache -> directory scan -> empty state
         const cache = await loadRunCache(cwd);
         const cachedFile = cache ? resolveRunCacheFile(cache) : '';
         if (cachedFile && existsSync(cachedFile)) {
           sourceFile = cachedFile;
-          results = loadManifestResults(cachedFile);
+          results = loadManifestResults(cachedFile, { hydrateTranscriptTrace: false });
         } else {
           const metas = listResultFiles(cwd, 1);
           if (metas.length > 0) {
             sourceFile = metas[0].path;
-            results = loadManifestResults(metas[0].path);
+            results = loadManifestResults(metas[0].path, { hydrateTranscriptTrace: false });
           }
           // If no metas, results stays empty — dashboard shows welcome state
         }
diff --git a/apps/cli/test/commands/eval/artifact-writer.test.ts b/apps/cli/test/commands/eval/artifact-writer.test.ts
index 14ab4dfdd..71c1e88e9 100644
--- a/apps/cli/test/commands/eval/artifact-writer.test.ts
+++ b/apps/cli/test/commands/eval/artifact-writer.test.ts
@@ -1,11 +1,19 @@
 import { afterEach, beforeEach, describe, expect, it } from 'bun:test';
+import { createHash } from 'node:crypto';
 import { mkdir, readFile, readdir, rm, writeFile } from 'node:fs/promises';
 import path from 'node:path';
 
 import {
+  AGENTV_RESULTS_ARTIFACTS_REF,
+  CANONICAL_TRACE_ARTIFACT_PATH,
+  CANONICAL_TRANSCRIPT_ARTIFACT_PATH,
+  EXECUTION_TRACE_SCHEMA_VERSION,
   type EvalTest,
   type EvaluationResult,
   type GraderResult,
+  TRACE_JSON_MEDIA_TYPE,
+  TRANSCRIPT_JSONL_MEDIA_TYPE,
+  TRANSCRIPT_SCHEMA_VERSION,
   TraceEnvelopeWireSchema,
   buildTraceFromMessages,
   fromTraceEnvelopeWire,
@@ -28,6 +36,8 @@ import {
   writeArtifacts,
   writeArtifactsFromResults,
 } from '../../../src/commands/eval/artifact-writer.js';
+import { prepareResultForJsonl } from '../../../src/commands/eval/run-eval.js';
+import { toSnakeCaseDeep } from '../../../src/utils/case-conversion.js';
 
 function makeResult(overrides: Partial<EvaluationResult> = {}): EvaluationResult {
   const result = {
@@ -72,6 +82,10 @@ function makeEvaluatorResult(overrides: Partial<GraderResult> = {}): GraderResul
   } as GraderResult;
 }
 
+function sha256Hex(content: Buffer): string {
+  return createHash('sha256').update(content).digest('hex');
+}
+
 // ---------------------------------------------------------------------------
 // Grading artifact
 // ---------------------------------------------------------------------------
@@ -587,6 +601,59 @@ describe('parseJsonlResults', () => {
     expect(results[0].trace.toolCalls).toEqual({ rg: 1 });
   });
 
+  it('rejects camelCase artifact pointer rows for the new wire field', () => {
+    const content = `${JSON.stringify({
+      test_id: 'pointer-row',
+      target: 'codex',
+      score: 1,
+      artifactPointers: {
+        transcript: {
+          ref: 'agentv/artifacts/v1',
+          key: 'transcripts/pointer-row/outputs/transcript.jsonl',
+          object_version: 'sha256:test',
+          path: 'pointer-row/outputs/transcript.jsonl',
+          sha256: 'test',
+          size: 1,
+          schema_version: 'agentv.transcript.v1',
+          media_type: 'application/x-ndjson',
+          family: 'transcripts',
+        },
+      },
+    })}\n`;
+
+    expect(() => parseJsonlResults(content)).toThrow(/Use "artifact_pointers"/);
+  });
+
+  it('does not treat parsed raw provider log pointers as fresh source artifacts', () => {
+    const content = `${JSON.stringify({
+      test_id: 'raw-log-case',
+      target: 'codex',
+      score: 1,
+      output: 'done',
+      raw_provider_log_path: 'raw-log-case/outputs/raw/provider.log',
+    })}\n`;
+
+    const results = parseJsonlResults(content);
+
+    expect(results).toHaveLength(1);
+    expect(results[0].rawProviderLogPath).toBeUndefined();
+  });
+
+  it('preserves raw provider log pointer metadata at the per-case JSONL boundary', () => {
+    const rawLogPath = path.join(import.meta.dir, '.test-provider-source.log');
+    const result = makeResult({
+      testId: 'raw-log-jsonl-case',
+      rawProviderLogPath: rawLogPath,
+    });
+
+    const prepared = prepareResultForJsonl(result, { outputMessages: 1 });
+    const wire = toSnakeCaseDeep(prepared) as Record<string, unknown>;
+
+    expect(prepared.rawProviderLogPath).toBe(rawLogPath);
+    expect(wire.raw_provider_log_path).toBe(rawLogPath);
+    expect(wire).not.toHaveProperty('raw_provider_log');
+  });
+
   it('handles empty content', () => {
     expect(parseJsonlResults('')).toHaveLength(0);
   });
@@ -828,9 +895,8 @@ describe('writeArtifactsFromResults', () => {
 
     await writeArtifactsFromResults(results, testDir);
 
-    const transcriptLines = (
-      await readFile(path.join(testDir, 'transcript-case', 'outputs', 'transcript.jsonl'), 'utf8')
-    )
+    const transcriptPath = path.join(testDir, 'transcript-case', 'outputs', 'transcript.jsonl');
+    const transcriptLines = (await readFile(transcriptPath, 'utf8'))
       .trim()
       .split('\n')
       .map((line) => JSON.parse(line));
@@ -911,7 +977,8 @@ describe('writeArtifactsFromResults', () => {
     expect(transcriptLines[1]).not.toHaveProperty('providerSessionId');
     expect(envelope.schema_version).toBe('agentv.trace.v1');
     expect(envelope.artifact_id).toMatch(/^execution-trace-/);
-    expect(envelope.artifacts.trace_path).toBe('outputs/trace.json');
+    expect(envelope.artifacts.trace_path).toBe(CANONICAL_TRACE_ARTIFACT_PATH);
+    expect(envelope.artifacts.transcript_path).toBe(CANONICAL_TRANSCRIPT_ARTIFACT_PATH);
     expect(envelope.artifacts).not.toHaveProperty('execution_trace_path');
     expect(envelope.eval.test_id).toBe('transcript-case');
     expect(envelope.trace.spans.map((span) => span.attributes['gen_ai.operation.name'])).toEqual([
@@ -919,12 +986,94 @@ describe('writeArtifactsFromResults', () => {
       'chat',
       'execute_tool',
     ]);
+    await expect(
+      readFile(path.join(testDir, 'transcript-case', 'outputs', 'transcript.json'), 'utf8'),
+    ).rejects.toThrow();
 
     const indexLine = JSON.parse(
       (await readFile(path.join(testDir, 'index.jsonl'), 'utf8')).trim(),
     );
     expect(indexLine.transcript_path).toBe('transcript-case/outputs/transcript.jsonl');
+    expect(indexLine.transcript_path.endsWith(CANONICAL_TRANSCRIPT_ARTIFACT_PATH)).toBe(true);
     expect(indexLine).not.toHaveProperty('trace_path');
+
+    const traceContent = await readFile(
+      path.join(testDir, 'transcript-case', 'outputs', 'trace.json'),
+    );
+    const transcriptContent = await readFile(transcriptPath);
+    const traceSha = sha256Hex(traceContent);
+    const transcriptSha = sha256Hex(transcriptContent);
+
+    expect(indexLine.artifact_pointers.trace).toMatchObject({
+      ref: AGENTV_RESULTS_ARTIFACTS_REF,
+      key: 'traces/transcript-case/outputs/trace.json',
+      object_version: `sha256:${traceSha}`,
+      path: 'transcript-case/outputs/trace.json',
+      sha256: traceSha,
+      size: traceContent.byteLength,
+      schema_version: EXECUTION_TRACE_SCHEMA_VERSION,
+      media_type: TRACE_JSON_MEDIA_TYPE,
+      family: 'traces',
+    });
+    expect(indexLine.artifact_pointers.transcript).toMatchObject({
+      ref: AGENTV_RESULTS_ARTIFACTS_REF,
+      key: 'transcripts/transcript-case/outputs/transcript.jsonl',
+      object_version: `sha256:${transcriptSha}`,
+      path: 'transcript-case/outputs/transcript.jsonl',
+      sha256: transcriptSha,
+      size: transcriptContent.byteLength,
+      schema_version: TRANSCRIPT_SCHEMA_VERSION,
+      media_type: TRANSCRIPT_JSONL_MEDIA_TYPE,
+      family: 'transcripts',
+    });
+  });
+
+  it('copies optional raw provider logs as non-canonical evidence', async () => {
+    const rawLogPath = path.join(testDir, 'provider-source.log');
+    const rawLog = [
+      '# provider-native stream log',
+      '{"time":"00:00","data":{"camelCaseProviderKey":true,"toolInput":{"filePath":"src/index.ts"}}}',
+      '',
+    ].join('\n');
+    await mkdir(testDir, { recursive: true });
+    await writeFile(rawLogPath, rawLog, 'utf8');
+
+    const results = [
+      makeResult({
+        testId: 'raw-log-case',
+        target: 'codex',
+        output: 'Raw log copied',
+        rawProviderLogPath: rawLogPath,
+      }),
+    ];
+
+    await writeArtifactsFromResults(results, testDir);
+
+    const copiedRawLogPath = path.join(testDir, 'raw-log-case', 'outputs', 'raw', 'provider.log');
+    expect(await readFile(copiedRawLogPath, 'utf8')).toBe(rawLog);
+
+    const transcriptPath = path.join(testDir, 'raw-log-case', 'outputs', 'transcript.jsonl');
+    await expect(readFile(transcriptPath, 'utf8')).resolves.toContain(
+      '"schema_version":"agentv.transcript.v1"',
+    );
+    await expect(
+      readFile(path.join(testDir, 'raw-log-case', 'outputs', 'transcript.json'), 'utf8'),
+    ).rejects.toThrow();
+
+    const envelope = TraceEnvelopeWireSchema.parse(
+      JSON.parse(
+        await readFile(path.join(testDir, 'raw-log-case', 'outputs', 'trace.json'), 'utf8'),
+      ),
+    );
+    expect(envelope.artifacts.raw_provider_log_path).toBe('outputs/raw/provider.log');
+    expect(envelope.artifacts.transcript_path).toBe('outputs/transcript.jsonl');
+
+    const indexLine = JSON.parse(
+      (await readFile(path.join(testDir, 'index.jsonl'), 'utf8')).trim(),
+    );
+    expect(indexLine.raw_provider_log_path).toBe('raw-log-case/outputs/raw/provider.log');
+    expect(indexLine.transcript_path).toBe('raw-log-case/outputs/transcript.jsonl');
+    expect(indexLine).not.toHaveProperty('transcript_json_path');
   });
 
   it('omits per-test transcript links when the execution trace has no transcript rows', async () => {
@@ -945,6 +1094,15 @@ describe('writeArtifactsFromResults', () => {
       (await readFile(path.join(testDir, 'index.jsonl'), 'utf8')).trim(),
     );
     expect(indexLine).not.toHaveProperty('transcript_path');
+    expect(indexLine.artifact_pointers.trace).toMatchObject({
+      ref: AGENTV_RESULTS_ARTIFACTS_REF,
+      key: 'traces/no-transcript-case/outputs/trace.json',
+      path: 'no-transcript-case/outputs/trace.json',
+      schema_version: EXECUTION_TRACE_SCHEMA_VERSION,
+      media_type: TRACE_JSON_MEDIA_TYPE,
+      family: 'traces',
+    });
+    expect(indexLine.artifact_pointers).not.toHaveProperty('transcript');
 
     const envelope = TraceEnvelopeWireSchema.parse(
       JSON.parse(
diff --git a/apps/cli/test/commands/results/combine.test.ts b/apps/cli/test/commands/results/combine.test.ts
index 9d5dfc5d3..889ef84d0 100644
--- a/apps/cli/test/commands/results/combine.test.ts
+++ b/apps/cli/test/commands/results/combine.test.ts
@@ -14,6 +14,13 @@ function toJsonl(...records: object[]): string {
   return `${records.map((record) => JSON.stringify(record)).join('\n')}\n`;
 }
 
+function readIndex(filePath: string): Record<string, unknown>[] {
+  return readFileSync(filePath, 'utf8')
+    .trim()
+    .split('\n')
+    .map((line) => JSON.parse(line) as Record<string, unknown>);
+}
+
 const result = (overrides: Record<string, unknown> = {}) => ({
   timestamp: '2026-06-01T10:00:00.000Z',
   test_id: 'test-a',
@@ -87,6 +94,108 @@ describe('results combine', () => {
     expect(benchmark.metadata.timestamp).toBe('2026-06-01T10:00:00.000Z');
   });
 
+  it('copies and rewrites artifact pointers when combining runs', () => {
+    const first = seedRun('run-a', [
+      result({
+        artifact_dir: 'demo/test-a',
+        transcript_path: 'demo/test-a/outputs/transcript.jsonl',
+        raw_provider_log_path: 'demo/test-a/outputs/raw/provider.log',
+        artifact_pointers: {
+          trace: {
+            ref: 'agentv/artifacts/v1',
+            key: 'traces/demo/test-a/outputs/trace.json',
+            object_version: 'sha256:trace',
+            path: 'demo/test-a/outputs/trace.json',
+            sha256: 'trace',
+            size: 18,
+            schema_version: 'agentv.trace.v1',
+            media_type: 'application/vnd.agentv.trace.v1+json',
+            family: 'traces',
+          },
+          transcript: {
+            ref: 'agentv/artifacts/v1',
+            key: 'transcripts/demo/test-a/outputs/transcript.jsonl',
+            object_version: 'sha256:transcript',
+            path: 'demo/test-a/outputs/transcript.jsonl',
+            sha256: 'transcript',
+            size: 180,
+            schema_version: 'agentv.transcript.v1',
+            media_type: 'application/x-ndjson',
+            family: 'transcripts',
+          },
+        },
+      }),
+    ]);
+    mkdirSync(path.join(first, 'demo', 'test-a', 'outputs', 'raw'), { recursive: true });
+    writeFileSync(path.join(first, 'demo', 'test-a', 'outputs', 'trace.json'), '{"trace":[]}\n');
+    writeFileSync(
+      path.join(first, 'demo', 'test-a', 'outputs', 'transcript.jsonl'),
+      `${JSON.stringify({
+        schema_version: 'agentv.transcript.v1',
+        test_id: 'test-a',
+        target: 'mock',
+        message_index: 0,
+        role: 'assistant',
+        content: 'Pointer-backed transcript',
+        source: { provider: 'mock', session_id: 'session-a' },
+      })}\n`,
+    );
+    writeFileSync(
+      path.join(first, 'demo', 'test-a', 'outputs', 'raw', 'provider.log'),
+      '{"event":"provider-native"}\n',
+    );
+    const second = seedRun('run-b', [
+      result({
+        timestamp: '2026-06-01T11:00:00.000Z',
+        test_id: 'test-b',
+        grading_path: 'demo/test-b/grading.json',
+        timing_path: 'demo/test-b/timing.json',
+      }),
+    ]);
+    mkdirSync(path.join(second, 'demo', 'test-b'), { recursive: true });
+    writeFileSync(path.join(second, 'demo', 'test-b', 'grading.json'), '{"assertions":[]}\n');
+    writeFileSync(
+      path.join(second, 'demo', 'test-b', 'timing.json'),
+      '{"duration_ms":0,"total_duration_seconds":0,"total_tokens":0,"token_usage":{}}\n',
+    );
+
+    const combined = combineRunSources({
+      cwd: tempDir,
+      sources: buildCombineRunSources([first, second], tempDir),
+      duplicatePolicy: 'error',
+    });
+
+    const [record] = readIndex(combined.manifestPath);
+    expect(record.artifact_dir).toBe('sources/source-1/demo/test-a');
+    expect(record.transcript_path).toBe('sources/source-1/demo/test-a/outputs/transcript.jsonl');
+    expect(record.raw_provider_log_path).toBe(
+      'sources/source-1/demo/test-a/outputs/raw/provider.log',
+    );
+    expect(record.artifact_pointers).toMatchObject({
+      trace: {
+        key: 'traces/sources/source-1/demo/test-a/outputs/trace.json',
+        path: 'sources/source-1/demo/test-a/outputs/trace.json',
+      },
+      transcript: {
+        key: 'transcripts/sources/source-1/demo/test-a/outputs/transcript.jsonl',
+        path: 'sources/source-1/demo/test-a/outputs/transcript.jsonl',
+      },
+    });
+    expect(
+      existsSync(path.join(combined.runDir, 'sources/source-1/demo/test-a/outputs/trace.json')),
+    ).toBe(true);
+    expect(
+      existsSync(
+        path.join(combined.runDir, 'sources/source-1/demo/test-a/outputs/transcript.jsonl'),
+      ),
+    ).toBe(true);
+    expect(
+      existsSync(
+        path.join(combined.runDir, 'sources/source-1/demo/test-a/outputs/raw/provider.log'),
+      ),
+    ).toBe(true);
+  });
+
   it('errors on duplicate rows unless latest is explicit', () => {
     const first = seedRun('run-a', [result({ timestamp: '2026-06-01T10:00:00.000Z', score: 0.1 })]);
     const second = seedRun('run-b', [
diff --git a/apps/cli/test/commands/results/export.test.ts b/apps/cli/test/commands/results/export.test.ts
index c034089f7..434c45136 100644
--- a/apps/cli/test/commands/results/export.test.ts
+++ b/apps/cli/test/commands/results/export.test.ts
@@ -9,12 +9,18 @@ import type {
   IndexArtifactEntry,
   TimingArtifact,
 } from '../../../src/commands/eval/artifact-writer.js';
+import { parseJsonlResults } from '../../../src/commands/eval/artifact-writer.js';
 import {
+  buildProjectionBundleFromExportedIndex,
   deriveExportRunId,
   deriveOutputDir,
   exportResults,
   loadExportSource,
 } from '../../../src/commands/results/export.js';
+import {
+  buildProjectionBundle,
+  serializeProjectionBundle,
+} from '../../../src/commands/results/projection-bundle.js';
 
 // ── Sample JSONL content (snake_case, matching on-disk format) ──────────
 
@@ -96,6 +102,63 @@ const RESULT_NO_TRACE = {
   duration_ms: 500,
 };
 
+const RESULT_WITH_RAW_PAYLOADS = {
+  timestamp: '2026-03-18T10:00:20.000Z',
+  test_id: 'test-private',
+  suite: 'privacy',
+  score: 0.25,
+  assertions: [
+    {
+      text: 'Avoids private content',
+      passed: false,
+      evidence: 'SECRET_ASSERTION_EVIDENCE',
+    },
+  ],
+  output: 'SECRET_FINAL_OUTPUT',
+  target: 'codex',
+  input: [{ role: 'user', content: 'SECRET_PROMPT_TEXT' }],
+  scores: [
+    {
+      name: 'privacy_review',
+      type: 'llm-grader',
+      score: 0.25,
+      assertions: [
+        {
+          text: 'Avoids private content',
+          passed: false,
+          evidence: 'SECRET_SCORE_EVIDENCE',
+        },
+      ],
+      details: { excerpt: 'SECRET_SCORE_DETAILS' },
+    },
+  ],
+  execution_status: 'quality_failure',
+  duration_ms: 900,
+  trace: {
+    messages: [
+      { role: 'user', content: 'SECRET_PROMPT_TEXT' },
+      {
+        role: 'assistant',
+        content: 'SECRET_FINAL_OUTPUT',
+        tool_calls: [
+          {
+            id: 'tool-call-1',
+            tool: 'shell',
+            input: { command: 'cat SECRET_TOOL_ARGUMENTS' },
+            output: 'SECRET_TOOL_RESULT',
+            status: 'ok',
+          },
+        ],
+      },
+    ],
+    events: [],
+    event_count: 2,
+    tool_calls: { shell: 1 },
+    error_count: 0,
+    llm_call_count: 1,
+  },
+};
+
 function toJsonl(...records: object[]): string {
   return `${records.map((r) => JSON.stringify(r)).join('\n')}\n`;
 }
@@ -184,6 +247,127 @@ describe('results export', () => {
     expect(deriveExportRunId(path.join(tempDir, 'legacy-results.jsonl'))).toBe('legacy-results');
   });
 
+  it('builds deterministic metadata-only projection bundle output for dry-run use', () => {
+    const sourceFile = path.join(tempDir, 'runs', 'privacy-run', 'index.jsonl');
+    const [result] = parseJsonlResults(toJsonl(RESULT_WITH_RAW_PAYLOADS));
+
+    const first = buildProjectionBundle([result], {
+      sourceFile,
+      runId: 'privacy-run',
+      cwd: tempDir,
+      duplicatePolicy: 'update',
+    });
+    const second = buildProjectionBundle([result], {
+      sourceFile,
+      runId: 'privacy-run',
+      cwd: tempDir,
+      duplicatePolicy: 'update',
+    });
+    const serialized = serializeProjectionBundle(first);
+
+    expect(serialized).toBe(serializeProjectionBundle(second));
+    expect(first.content_policy).toMatchObject({
+      raw_content: 'excluded',
+      raw_content_opt_in: false,
+      default_capture: 'metadata',
+    });
+    expect(first.entries[0].artifact_refs).toMatchObject({
+      status: 'planned_export',
+      timing_path: 'privacy/test-private/timing.json',
+    });
+    expect(first.entries[0].artifact_refs).not.toHaveProperty('input_path');
+    expect(first.entries[0].artifact_refs).not.toHaveProperty('output_path');
+    expect(first.entries[0].artifact_refs).not.toHaveProperty('answer_path');
+    expect(first.entries[0].artifact_refs).not.toHaveProperty('response_path');
+    expect(first.entries[0].artifact_refs).not.toHaveProperty('transcript_path');
+    expect(first.entries[0].artifact_refs).not.toHaveProperty('trace_path');
+    expect(first.entries[0].feedback).not.toHaveProperty('grading_path');
+    expect(first.entries[0].trace).not.toHaveProperty('envelope_ref');
+    expect(first.entries[0].trace_envelope).not.toHaveProperty('artifacts');
+    expect(first.entries[0].projection_identity.dimensions.run_id).toBe('privacy-run');
+    expect(first.entries[0].trace_envelope.trace.spans.length).toBeGreaterThan(0);
+    expect(first.entries[0].feedback.scores?.[0]).not.toHaveProperty('evidence');
+    expect(serialized).not.toContain('SECRET_PROMPT_TEXT');
+    expect(serialized).not.toContain('SECRET_FINAL_OUTPUT');
+    expect(serialized).not.toContain('SECRET_TOOL_ARGUMENTS');
+    expect(serialized).not.toContain('SECRET_TOOL_RESULT');
+    expect(serialized).not.toContain('SECRET_SCORE_EVIDENCE');
+  });
+
+  it('builds projection bundles when grader scores omit assertion arrays', () => {
+    const sourceFile = path.join(tempDir, 'runs', 'legacy-grader-run', 'index.jsonl');
+    const [result] = parseJsonlResults(
+      toJsonl({
+        ...RESULT_FULL,
+        scores: [
+          {
+            name: 'legacy_grader',
+            type: 'llm-grader',
+            score: 1,
+          },
+        ],
+      }),
+    );
+
+    const bundle = buildProjectionBundle([result], {
+      sourceFile,
+      runId: 'legacy-grader-run',
+      cwd: tempDir,
+    });
+
+    expect(bundle.entries[0].feedback.scores?.[0]).toMatchObject({
+      name: 'legacy_grader',
+      type: 'llm-grader',
+      score: 1,
+    });
+    expect(bundle.entries[0].trace_envelope.scores?.[0]).toMatchObject({
+      name: 'legacy_grader',
+      type: 'llm-grader',
+      score: 1,
+    });
+  });
+
+  it('includes raw prompt, output, tool payloads, and score evidence only with opt-in', () => {
+    const sourceFile = path.join(tempDir, 'runs', 'privacy-run', 'index.jsonl');
+    const [result] = parseJsonlResults(toJsonl(RESULT_WITH_RAW_PAYLOADS));
+
+    const bundle = buildProjectionBundle([result], {
+      sourceFile,
+      runId: 'privacy-run',
+      cwd: tempDir,
+      includeRawContent: true,
+    });
+    const serialized = serializeProjectionBundle(bundle);
+
+    expect(bundle.content_policy).toMatchObject({
+      raw_content: 'included',
+      raw_content_opt_in: true,
+      default_capture: 'full',
+    });
+    expect(bundle.entries[0].capture).toMatchObject({
+      content: 'full',
+      redaction_level: 'none',
+    });
+    expect(bundle.entries[0].artifact_refs).toMatchObject({
+      status: 'planned_export',
+      input_path: 'privacy/test-private/input.md',
+      output_path: 'privacy/test-private/outputs/answer.md',
+      answer_path: 'privacy/test-private/outputs/answer.md',
+      response_path: 'privacy/test-private/outputs/response.md',
+      trace_path: 'privacy/test-private/outputs/trace.json',
+    });
+    expect(bundle.entries[0].trace.envelope_ref).toBe('privacy/test-private/outputs/trace.json');
+    expect(bundle.entries[0].trace_envelope.artifacts).toBeDefined();
+    expect(bundle.entries[0].feedback.grading_path).toBe('privacy/test-private/grading.json');
+    expect(bundle.entries[0].raw_content).toBeDefined();
+    expect(bundle.entries[0].feedback.scores?.[0]).toHaveProperty('evidence');
+    expect(serialized).toContain('SECRET_PROMPT_TEXT');
+    expect(serialized).toContain('SECRET_FINAL_OUTPUT');
+    expect(serialized).toContain('SECRET_TOOL_ARGUMENTS');
+    expect(serialized).toContain('SECRET_TOOL_RESULT');
+    expect(serialized).toContain('SECRET_SCORE_EVIDENCE');
+  });
+
   it('should create benchmark.json matching artifact-writer schema', async () => {
     const outputDir = path.join(tempDir, 'output');
     const content = toJsonl(RESULT_FULL, RESULT_PARTIAL);
@@ -319,6 +503,34 @@ describe('results export', () => {
     expect(readAnswer(outputDir, RESULT_FULL)).toBe('Hello, Alice!');
   });
 
+  it('builds projection bundles from emitted skipped artifacts for duplicate policy skip', async () => {
+    const sourceFile = path.join(tempDir, 'runs', 'retry-run', 'index.jsonl');
+    const outputDir = path.join(tempDir, 'output');
+    const updated = { ...RESULT_FULL, output: 'Skipped answer.' };
+
+    await exportResults(sourceFile, toJsonl(RESULT_FULL), outputDir, {
+      duplicatePolicy: 'update',
+    });
+    await exportResults(sourceFile, toJsonl(updated), outputDir, {
+      duplicatePolicy: 'skip',
+    });
+
+    const bundle = buildProjectionBundleFromExportedIndex({
+      sourceFile,
+      outputDir,
+      cwd: tempDir,
+      includeRawContent: true,
+      duplicatePolicy: 'skip',
+    });
+
+    expect(bundle.entries[0].artifact_refs.status).toBe('emitted');
+    expect(bundle.entries[0].raw_content?.output).toBe('Hello, Alice!');
+    expect(serializeProjectionBundle(bundle)).not.toContain('Skipped answer.');
+    expect(bundle.entries[0].trace_envelope.projection_identity).toEqual(
+      readIndex(outputDir)[0].projection_identity,
+    );
+  });
+
   it('fails duplicate projection artifacts when duplicate policy is error', async () => {
     const sourceFile = path.join(tempDir, 'runs', 'retry-run', 'index.jsonl');
     const outputDir = path.join(tempDir, 'output');
diff --git a/apps/cli/test/commands/results/remote-metadata.test.ts b/apps/cli/test/commands/results/remote-metadata.test.ts
index fb66e430b..44fe86dbc 100644
--- a/apps/cli/test/commands/results/remote-metadata.test.ts
+++ b/apps/cli/test/commands/results/remote-metadata.test.ts
@@ -11,6 +11,7 @@ import {
   readRemoteRunTags,
   writeRemoteRunTags,
 } from '../../../src/commands/results/remote-metadata.js';
+import { RUN_OPLOG_REF } from '../../../src/commands/results/run-oplog.js';
 
 const RUN_TIMESTAMP = '2026-06-06T10-00-00-000Z';
 
@@ -33,7 +34,10 @@ function git(cmd: string, cwd: string): string {
   }).trim();
 }
 
-function seedRepo(repoDir: string): string {
+function seedRepo(
+  repoDir: string,
+  options?: { readonly artifactTags?: readonly string[] },
+): string {
   git('git init --quiet', repoDir);
   git('git config user.email "test@example.com"', repoDir);
   git('git config user.name "Test User"', repoDir);
@@ -41,10 +45,17 @@ function seedRepo(repoDir: string): string {
   const runDir = path.join(repoDir, 'runs', 'default', RUN_TIMESTAMP);
   mkdirSync(runDir, { recursive: true });
   writeFileSync(path.join(runDir, 'index.jsonl'), '{"test_id":"alpha","score":1}\n');
-  writeFileSync(
-    path.join(runDir, 'tags.json'),
-    `${JSON.stringify({ tags: ['remote-baseline'], updated_at: '2026-06-06T09:00:00.000Z' }, null, 2)}\n`,
-  );
+  const artifactTags = options?.artifactTags ?? ['remote-baseline'];
+  if (artifactTags.length > 0) {
+    writeFileSync(
+      path.join(runDir, 'tags.json'),
+      `${JSON.stringify(
+        { tags: artifactTags, updated_at: '2026-06-06T09:00:00.000Z' },
+        null,
+        2,
+      )}\n`,
+    );
+  }
   git('git add runs', repoDir);
   git('git commit --quiet -m "seed remote run"', repoDir);
   return path.join(runDir, 'index.jsonl');
@@ -72,6 +83,8 @@ describe('remote metadata tags', () => {
     expect(state.remoteTags).toEqual(['remote-baseline']);
     expect(state.pendingTags).toEqual(['pending', 'remote-baseline']);
     expect(state.dirty).toBe(true);
+    expect(state.oplogWatermark.ref).toBe(RUN_OPLOG_REF);
+    expect(state.oplogWatermark.operation_id).toBeString();
     expect(state.metadataPath).toContain(
       path.join('metadata', 'runs', 'default', RUN_TIMESTAMP, 'tags.json'),
     );
@@ -83,6 +96,7 @@ describe('remote metadata tags', () => {
     expect(reloaded.tags).toEqual(['pending', 'remote-baseline']);
     expect(reloaded.pendingTags).toEqual(['pending', 'remote-baseline']);
     expect(reloaded.dirty).toBe(true);
+    expect(reloaded.oplogWatermark.operation_id).toBe(state.oplogWatermark.operation_id);
   });
 
   it('uses committed metadata overlays as the clean remote baseline', () => {
@@ -98,6 +112,7 @@ describe('remote metadata tags', () => {
     expect(reloaded.remoteTags).toEqual(['accepted']);
     expect(reloaded.pendingTags).toBeUndefined();
     expect(reloaded.dirty).toBe(false);
+    expect(reloaded.oplogWatermark.ref).toBe(RUN_OPLOG_REF);
   });
 
   it('persists clearing remote tags as an empty pending overlay', () => {
@@ -112,6 +127,25 @@ describe('remote metadata tags', () => {
     expect(readFileSync(state.metadataPath, 'utf8')).toContain('"tags": []');
   });
 
+  it('records an explicit clear watermark when the remote baseline is already empty', () => {
+    const manifestPath = seedRepo(repoDir, { artifactTags: [] });
+
+    const state = writeRemoteRunTags(repoDir, manifestPath, []);
+    const metadata = JSON.parse(readFileSync(state.metadataPath, 'utf8')) as {
+      tags: string[];
+      oplog_watermark: { ref: string; operation_id?: string; updated_at?: string };
+    };
+
+    expect(state.tags).toEqual([]);
+    expect(state.remoteTags).toEqual([]);
+    expect(state.pendingTags).toEqual([]);
+    expect(state.dirty).toBe(true);
+    expect(state.oplogWatermark.ref).toBe(RUN_OPLOG_REF);
+    expect(state.oplogWatermark.operation_id).toBeString();
+    expect(metadata.tags).toEqual([]);
+    expect(metadata.oplog_watermark.operation_id).toBe(state.oplogWatermark.operation_id);
+  });
+
   it('rejects writes when the configured results path is not a git checkout', () => {
     const runDir = path.join(repoDir, 'runs', 'default', RUN_TIMESTAMP);
     mkdirSync(runDir, { recursive: true });
diff --git a/apps/cli/test/commands/results/run-oplog.test.ts b/apps/cli/test/commands/results/run-oplog.test.ts
new file mode 100644
index 000000000..ff6990bb0
--- /dev/null
+++ b/apps/cli/test/commands/results/run-oplog.test.ts
@@ -0,0 +1,108 @@
+import { describe, expect, it } from 'bun:test';
+import { execFileSync } from 'node:child_process';
+
+import {
+  RUN_OPERATION_SCHEMA_VERSION,
+  RUN_OPLOG_REF,
+  buildRunIdFromRelativePath,
+  createRunTagsSetOperation,
+  materializeRunState,
+  watermarkFromRunOperation,
+} from '../../../src/commands/results/run-oplog.js';
+
+const PRIMARY_RESULTS_REF = 'agentv/results/v1';
+const ARTIFACTS_REF = 'agentv/artifacts/v1';
+
+function refsHavePrefixConflict(left: string, right: string): boolean {
+  return left === right || left.startsWith(`${right}/`) || right.startsWith(`${left}/`);
+}
+
+function isValidGitBranchRef(ref: string): boolean {
+  try {
+    execFileSync('git', ['check-ref-format', `refs/heads/${ref}`], { stdio: 'ignore' });
+    return true;
+  } catch {
+    return false;
+  }
+}
+
+describe('run operation log contract', () => {
+  it('defines the stable oplog ref', () => {
+    expect(RUN_OPLOG_REF).toBe('agentv/oplog/v1');
+  });
+
+  it('keeps results, artifacts, and oplog refs non-prefix-conflicting', () => {
+    const refs = [PRIMARY_RESULTS_REF, ARTIFACTS_REF, RUN_OPLOG_REF];
+
+    for (const left of refs) {
+      expect(isValidGitBranchRef(left)).toBe(true);
+    }
+
+    for (const [index, left] of refs.entries()) {
+      for (const right of refs.slice(index + 1)) {
+        expect(refsHavePrefixConflict(left, right)).toBe(false);
+      }
+    }
+  });
+
+  it('builds a typed tag replacement operation envelope', () => {
+    const operation = createRunTagsSetOperation({
+      runId: 'smoke::2026-06-21T10-00-00-000Z',
+      runPath: 'smoke/2026-06-21T10-00-00-000Z',
+      tags: ['baseline', 'reviewed'],
+      actor: { kind: 'dashboard', id: 'local' },
+      authoredAt: '2026-06-21T10:15:00.000Z',
+      operationId: 'op-123',
+    });
+
+    expect(operation).toEqual({
+      schema_version: RUN_OPERATION_SCHEMA_VERSION,
+      operation_id: 'op-123',
+      operation_type: 'run.tags.set',
+      authored_at: '2026-06-21T10:15:00.000Z',
+      actor: { kind: 'dashboard', id: 'local' },
+      subject: {
+        run_id: 'smoke::2026-06-21T10-00-00-000Z',
+        run_path: 'smoke/2026-06-21T10-00-00-000Z',
+      },
+      payload: {
+        tags: ['baseline', 'reviewed'],
+      },
+    });
+  });
+
+  it('materializes final run state from tags and an operation watermark', () => {
+    const operation = createRunTagsSetOperation({
+      runId: '2026-06-21T10-00-00-000Z',
+      tags: ['accepted'],
+      authoredAt: '2026-06-21T10:15:00.000Z',
+      operationId: 'op-456',
+    });
+
+    expect(
+      materializeRunState({
+        tags: operation.payload.tags,
+        watermark: watermarkFromRunOperation(operation),
+      }),
+    ).toEqual({
+      final_state: {
+        lifecycle: 'active',
+        tags: ['accepted'],
+      },
+      oplog_watermark: {
+        ref: RUN_OPLOG_REF,
+        operation_id: 'op-456',
+        updated_at: '2026-06-21T10:15:00.000Z',
+      },
+    });
+  });
+
+  it('derives run IDs from results branch paths', () => {
+    expect(buildRunIdFromRelativePath('default/2026-06-21T10-00-00-000Z')).toBe(
+      '2026-06-21T10-00-00-000Z',
+    );
+    expect(buildRunIdFromRelativePath('smoke/2026-06-21T10-00-00-000Z')).toBe(
+      'smoke::2026-06-21T10-00-00-000Z',
+    );
+  });
+});
diff --git a/apps/cli/test/commands/results/run-tags.test.ts b/apps/cli/test/commands/results/run-tags.test.ts
new file mode 100644
index 000000000..23dabb79c
--- /dev/null
+++ b/apps/cli/test/commands/results/run-tags.test.ts
@@ -0,0 +1,52 @@
+import { afterEach, beforeEach, describe, expect, it } from 'bun:test';
+import { existsSync, mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from 'node:fs';
+import { tmpdir } from 'node:os';
+import path from 'node:path';
+
+import { RUN_OPLOG_REF } from '../../../src/commands/results/run-oplog.js';
+import {
+  deleteRunTags,
+  readRunTags,
+  runTagsPath,
+  writeRunTags,
+} from '../../../src/commands/results/run-tags.js';
+
+describe('run tags sidecar', () => {
+  let tempDir: string;
+  let manifestPath: string;
+
+  beforeEach(() => {
+    tempDir = mkdtempSync(path.join(tmpdir(), 'agentv-run-tags-'));
+    const runDir = path.join(tempDir, '.agentv', 'results', 'runs', 'default', '2026-clear-tags');
+    mkdirSync(runDir, { recursive: true });
+    manifestPath = path.join(runDir, 'index.jsonl');
+    writeFileSync(manifestPath, '{"test_id":"alpha","score":1}\n', 'utf8');
+  });
+
+  afterEach(() => {
+    rmSync(tempDir, { recursive: true, force: true });
+  });
+
+  it('records empty tags as a clear tombstone with an oplog watermark', () => {
+    writeRunTags(manifestPath, ['baseline']);
+
+    const cleared = writeRunTags(manifestPath, []);
+    const reloaded = readRunTags(manifestPath);
+
+    expect(existsSync(runTagsPath(manifestPath))).toBe(true);
+    expect(cleared.tags).toEqual([]);
+    expect(cleared.oplog_watermark?.ref).toBe(RUN_OPLOG_REF);
+    expect(cleared.oplog_watermark?.operation_id).toBeString();
+    expect(reloaded).toEqual(cleared);
+    expect(readFileSync(runTagsPath(manifestPath), 'utf8')).toContain('"tags": []');
+  });
+
+  it('keeps physical sidecar deletion explicit', () => {
+    writeRunTags(manifestPath, []);
+
+    deleteRunTags(manifestPath);
+
+    expect(existsSync(runTagsPath(manifestPath))).toBe(false);
+    expect(readRunTags(manifestPath)).toBeUndefined();
+  });
+});
diff --git a/apps/cli/test/commands/results/serve.test.ts b/apps/cli/test/commands/results/serve.test.ts
index 834d041ca..389089eaf 100644
--- a/apps/cli/test/commands/results/serve.test.ts
+++ b/apps/cli/test/commands/results/serve.test.ts
@@ -1,6 +1,14 @@
 import { afterEach, beforeEach, describe, expect, it, spyOn } from 'bun:test';
 import { execFileSync, execSync } from 'node:child_process';
-import { existsSync, mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from 'node:fs';
+import {
+  existsSync,
+  mkdirSync,
+  mkdtempSync,
+  readFileSync,
+  rmSync,
+  symlinkSync,
+  writeFileSync,
+} from 'node:fs';
 import os from 'node:os';
 import { tmpdir } from 'node:os';
 import path from 'node:path';
@@ -8,6 +16,7 @@ import { fileURLToPath } from 'node:url';
 
 import { addProject, saveProjectRegistry } from '@agentv/core';
 
+import { RUN_OPLOG_REF } from '../../../src/commands/results/run-oplog.js';
 import {
   createApp,
   loadResults,
@@ -934,7 +943,13 @@ describe('serve app', () => {
 
       expect(res.status).toBe(200);
       const data = (await res.json()) as {
-        runs: Array<{ filename: string; source: string; on_remote: boolean }>;
+        runs: Array<{
+          filename: string;
+          source: string;
+          on_remote: boolean;
+          final_state: { lifecycle: string; tags: string[] };
+          oplog_watermark: { ref: string };
+        }>;
       };
       expect(data.runs).toHaveLength(1);
       // A local-only run (no remote configured) is not on the remote branch.
@@ -942,6 +957,159 @@ describe('serve app', () => {
         filename,
         source: 'local',
         on_remote: false,
+        final_state: {
+          lifecycle: 'active',
+          tags: [],
+        },
+        oplog_watermark: {
+          ref: RUN_OPLOG_REF,
+        },
+      });
+    });
+
+    it('exposes materialized final state and oplog watermark for local run tags', async () => {
+      const runsDir = path.join(tempDir, '.agentv', 'results', 'runs');
+      mkdirSync(runsDir, { recursive: true });
+      const filename = '2026-03-25T10-00-00-000Z';
+      const runDir = path.join(runsDir, filename);
+      mkdirSync(runDir, { recursive: true });
+      writeFileSync(path.join(runDir, 'index.jsonl'), toJsonl(RESULT_A));
+      writeFileSync(
+        path.join(runDir, 'tags.json'),
+        `${JSON.stringify(
+          {
+            tags: ['accepted'],
+            updated_at: '2026-06-21T10:15:00.000Z',
+            oplog_watermark: {
+              ref: RUN_OPLOG_REF,
+              operation_id: 'op-local-tags',
+              updated_at: '2026-06-21T10:15:00.000Z',
+            },
+          },
+          null,
+          2,
+        )}\n`,
+      );
+
+      const app = createApp([], tempDir, tempDir, undefined, { studioDir });
+
+      const listRes = await app.request('/api/runs');
+      expect(listRes.status).toBe(200);
+      const listData = (await listRes.json()) as {
+        runs: Array<{
+          tags: string[];
+          final_state: { lifecycle: string; tags: string[] };
+          oplog_watermark: { ref: string; operation_id?: string; updated_at?: string };
+        }>;
+      };
+      expect(listData.runs[0]).toMatchObject({
+        tags: ['accepted'],
+        final_state: {
+          lifecycle: 'active',
+          tags: ['accepted'],
+        },
+        oplog_watermark: {
+          ref: RUN_OPLOG_REF,
+          operation_id: 'op-local-tags',
+          updated_at: '2026-06-21T10:15:00.000Z',
+        },
+      });
+
+      const detailRes = await app.request(`/api/runs/${encodeURIComponent(filename)}`);
+      expect(detailRes.status).toBe(200);
+      const detailData = (await detailRes.json()) as {
+        tags: string[];
+        final_state: { lifecycle: string; tags: string[] };
+        oplog_watermark: { ref: string; operation_id?: string; updated_at?: string };
+      };
+      expect(detailData).toMatchObject({
+        tags: ['accepted'],
+        final_state: {
+          lifecycle: 'active',
+          tags: ['accepted'],
+        },
+        oplog_watermark: {
+          ref: RUN_OPLOG_REF,
+          operation_id: 'op-local-tags',
+          updated_at: '2026-06-21T10:15:00.000Z',
+        },
+      });
+    });
+
+    it('preserves a local tag clear watermark after DELETE /tags', async () => {
+      const runsDir = path.join(tempDir, '.agentv', 'results', 'runs');
+      mkdirSync(runsDir, { recursive: true });
+      const filename = '2026-03-25T10-30-00-000Z';
+      const runDir = path.join(runsDir, filename);
+      mkdirSync(runDir, { recursive: true });
+      writeFileSync(path.join(runDir, 'index.jsonl'), toJsonl(RESULT_A));
+      writeFileSync(
+        path.join(runDir, 'tags.json'),
+        `${JSON.stringify(
+          {
+            tags: ['accepted'],
+            updated_at: '2026-06-21T10:15:00.000Z',
+            oplog_watermark: {
+              ref: RUN_OPLOG_REF,
+              operation_id: 'op-before-clear',
+              updated_at: '2026-06-21T10:15:00.000Z',
+            },
+          },
+          null,
+          2,
+        )}\n`,
+      );
+
+      const app = createApp([], tempDir, tempDir, undefined, { studioDir });
+
+      const deleteRes = await app.request(`/api/runs/${encodeURIComponent(filename)}/tags`, {
+        method: 'DELETE',
+      });
+      expect(deleteRes.status).toBe(200);
+      const deleteData = (await deleteRes.json()) as {
+        ok: boolean;
+        tags: string[];
+        final_state: { lifecycle: string; tags: string[] };
+        oplog_watermark: { ref: string; operation_id?: string; updated_at?: string };
+        updated_at: string;
+      };
+      expect(deleteData.ok).toBe(true);
+      expect(deleteData.tags).toEqual([]);
+      expect(deleteData.final_state).toEqual({
+        lifecycle: 'active',
+        tags: [],
+      });
+      expect(deleteData.oplog_watermark.ref).toBe(RUN_OPLOG_REF);
+      expect(deleteData.oplog_watermark.operation_id).toBeString();
+      expect(deleteData.oplog_watermark.operation_id).not.toBe('op-before-clear');
+      expect(deleteData.oplog_watermark.updated_at).toBe(deleteData.updated_at);
+
+      const tagFile = JSON.parse(readFileSync(path.join(runDir, 'tags.json'), 'utf8')) as {
+        tags: string[];
+        oplog_watermark: { ref: string; operation_id?: string; updated_at?: string };
+      };
+      expect(tagFile.tags).toEqual([]);
+      expect(tagFile.oplog_watermark.operation_id).toBe(deleteData.oplog_watermark.operation_id);
+
+      const reloadedApp = createApp([], tempDir, tempDir, undefined, { studioDir });
+      const detailRes = await reloadedApp.request(`/api/runs/${encodeURIComponent(filename)}`);
+      expect(detailRes.status).toBe(200);
+      const detailData = (await detailRes.json()) as {
+        tags: string[];
+        final_state: { lifecycle: string; tags: string[] };
+        oplog_watermark: { ref: string; operation_id?: string; updated_at?: string };
+      };
+      expect(detailData).toMatchObject({
+        tags: [],
+        final_state: {
+          lifecycle: 'active',
+          tags: [],
+        },
+        oplog_watermark: {
+          ref: RUN_OPLOG_REF,
+          operation_id: deleteData.oplog_watermark.operation_id,
+          updated_at: deleteData.oplog_watermark.updated_at,
+        },
       });
     });
 
@@ -2538,6 +2706,296 @@ describe('serve app', () => {
     });
   });
 
+  describe('GET /api/runs/:filename/evals/:evalId/transcript', () => {
+    it('loads canonical transcript JSONL lazily from the manifest pointer', async () => {
+      const runsDir = path.join(tempDir, '.agentv', 'results', 'runs', 'with-transcript');
+      const runId = 'with-transcript::2026-03-25T10-00-00-000Z';
+      const timestampDir = path.join(runsDir, '2026-03-25T10-00-00-000Z');
+      const transcriptArtifactPath = 'demo/test-greeting/outputs/transcript.jsonl';
+      const answerArtifactPath = 'demo/test-greeting/outputs/answer.md';
+      const transcriptPath = path.join(timestampDir, transcriptArtifactPath);
+      const answerPath = path.join(timestampDir, answerArtifactPath);
+      const transcriptJsonl = `${JSON.stringify({
+        test_id: 'test-greeting',
+        target: 'gpt-4o',
+        message_index: 0,
+        role: 'user',
+        content: 'Hello',
+      })}\n`;
+
+      mkdirSync(path.dirname(transcriptPath), { recursive: true });
+      writeFileSync(transcriptPath, transcriptJsonl);
+      writeFileSync(answerPath, 'Hello, Alice!');
+      writeFileSync(
+        path.join(timestampDir, 'index.jsonl'),
+        toJsonl({
+          ...RESULT_A,
+          experiment: 'with-transcript',
+          transcript_path: transcriptArtifactPath,
+          answer_path: answerArtifactPath,
+        }),
+      );
+
+      const app = createApp([], tempDir, tempDir, undefined, { studioDir });
+      const res = await app.request(
+        `/api/runs/${encodeURIComponent(runId)}/evals/test-greeting/transcript`,
+      );
+
+      expect(res.status).toBe(200);
+      const data = (await res.json()) as {
+        status: string;
+        transcript_path: string;
+        content: string;
+        answer_path: string;
+        answer_content: string;
+      };
+      expect(data).toMatchObject({
+        status: 'ok',
+        transcript_path: transcriptArtifactPath,
+        content: transcriptJsonl,
+        answer_path: answerArtifactPath,
+        answer_content: 'Hello, Alice!',
+      });
+    });
+
+    it('loads pointer-shaped transcript metadata when it resolves to a local artifact path', async () => {
+      const runsDir = path.join(tempDir, '.agentv', 'results', 'runs', 'pointer-transcript');
+      const runId = 'pointer-transcript::2026-03-25T11-00-00-000Z';
+      const timestampDir = path.join(runsDir, '2026-03-25T11-00-00-000Z');
+      const artifactPath = 'demo/test-greeting/outputs/transcript.jsonl';
+      const transcriptPath = path.join(timestampDir, artifactPath);
+      const transcriptJsonl = `${JSON.stringify({
+        test_id: 'test-greeting',
+        target: 'gpt-4o',
+        message_index: 0,
+        role: 'assistant',
+        content: 'Hello',
+      })}\n`;
+
+      mkdirSync(path.dirname(transcriptPath), { recursive: true });
+      writeFileSync(transcriptPath, transcriptJsonl);
+      writeFileSync(
+        path.join(timestampDir, 'index.jsonl'),
+        toJsonl({
+          ...RESULT_A,
+          experiment: 'pointer-transcript',
+          artifact_pointers: {
+            transcript: {
+              ref: 'agentv/artifacts/v1',
+              path: artifactPath,
+            },
+          },
+        }),
+      );
+
+      const app = createApp([], tempDir, tempDir, undefined, { studioDir });
+      const res = await app.request(
+        `/api/runs/${encodeURIComponent(runId)}/evals/test-greeting/transcript`,
+      );
+
+      expect(res.status).toBe(200);
+      const data = (await res.json()) as {
+        status: string;
+        transcript_path: string;
+        content: string;
+        pointer: string;
+      };
+      expect(data.status).toBe('ok');
+      expect(data.transcript_path).toBe(artifactPath);
+      expect(data.content).toBe(transcriptJsonl);
+      expect(data.pointer).toContain('agentv/artifacts/v1');
+    });
+
+    it('returns a clear missing state when no transcript pointer is recorded', async () => {
+      const runId = writeLocalRunArtifact(
+        tempDir,
+        'missing-transcript',
+        '2026-03-25T12-00-00-000Z',
+        RESULT_A,
+      );
+
+      const app = createApp([], tempDir, tempDir, undefined, { studioDir });
+      const res = await app.request(
+        `/api/runs/${encodeURIComponent(runId)}/evals/test-greeting/transcript`,
+      );
+
+      expect(res.status).toBe(200);
+      const data = (await res.json()) as { status: string; message: string };
+      expect(data.status).toBe('missing');
+      expect(data.message).toContain('outputs/transcript.jsonl');
+    });
+
+    it('returns a clear dangling state when the transcript pointer cannot be read', async () => {
+      const runsDir = path.join(tempDir, '.agentv', 'results', 'runs', 'dangling-transcript');
+      const runId = 'dangling-transcript::2026-03-25T13-00-00-000Z';
+      const timestampDir = path.join(runsDir, '2026-03-25T13-00-00-000Z');
+      const artifactPath = 'demo/test-greeting/outputs/transcript.jsonl';
+
+      mkdirSync(timestampDir, { recursive: true });
+      writeFileSync(
+        path.join(timestampDir, 'index.jsonl'),
+        toJsonl({
+          ...RESULT_A,
+          experiment: 'dangling-transcript',
+          transcript_path: artifactPath,
+        }),
+      );
+
+      const app = createApp([], tempDir, tempDir, undefined, { studioDir });
+      const res = await app.request(
+        `/api/runs/${encodeURIComponent(runId)}/evals/test-greeting/transcript`,
+      );
+
+      expect(res.status).toBe(200);
+      const data = (await res.json()) as {
+        status: string;
+        transcript_path: string;
+        message: string;
+      };
+      expect(data.status).toBe('dangling');
+      expect(data.transcript_path).toBe(artifactPath);
+      expect(data.message).toContain('not available');
+    });
+
+    it('treats symlinked transcript artifacts outside the run workspace as dangling', async () => {
+      const secret = 'outside transcript secret';
+      const outsidePath = path.join(tempDir, 'outside-transcript.jsonl');
+      writeFileSync(outsidePath, secret);
+
+      const runsDir = path.join(tempDir, '.agentv', 'results', 'runs', 'escaped-transcript');
+      const runId = 'escaped-transcript::2026-03-25T13-30-00-000Z';
+      const timestampDir = path.join(runsDir, '2026-03-25T13-30-00-000Z');
+      const artifactPath = 'demo/test-greeting/outputs/transcript.jsonl';
+      const symlinkPath = path.join(timestampDir, artifactPath);
+
+      mkdirSync(path.dirname(symlinkPath), { recursive: true });
+      symlinkSync(outsidePath, symlinkPath);
+      writeFileSync(
+        path.join(timestampDir, 'index.jsonl'),
+        toJsonl({
+          ...RESULT_A,
+          experiment: 'escaped-transcript',
+          transcript_path: artifactPath,
+        }),
+      );
+
+      const app = createApp([], tempDir, tempDir, undefined, { studioDir });
+      const res = await app.request(
+        `/api/runs/${encodeURIComponent(runId)}/evals/test-greeting/transcript`,
+      );
+
+      expect(res.status).toBe(200);
+      const text = await res.text();
+      expect(text).not.toContain(secret);
+      const data = JSON.parse(text) as { status: string; transcript_path: string };
+      expect(data.status).toBe('dangling');
+      expect(data.transcript_path).toBe(artifactPath);
+    });
+
+    it('omits symlinked answer artifacts outside the run workspace from transcript responses', async () => {
+      const secret = 'outside answer secret';
+      const outsidePath = path.join(tempDir, 'outside-answer.md');
+      writeFileSync(outsidePath, secret);
+
+      const runsDir = path.join(tempDir, '.agentv', 'results', 'runs', 'escaped-answer');
+      const runId = 'escaped-answer::2026-03-25T13-45-00-000Z';
+      const timestampDir = path.join(runsDir, '2026-03-25T13-45-00-000Z');
+      const transcriptArtifactPath = 'demo/test-greeting/outputs/transcript.jsonl';
+      const answerArtifactPath = 'demo/test-greeting/outputs/answer.md';
+      const transcriptPath = path.join(timestampDir, transcriptArtifactPath);
+      const answerPath = path.join(timestampDir, answerArtifactPath);
+      const transcriptJsonl = `${JSON.stringify({
+        test_id: 'test-greeting',
+        target: 'gpt-4o',
+        message_index: 0,
+        role: 'user',
+        content: 'Hello',
+      })}\n`;
+
+      mkdirSync(path.dirname(transcriptPath), { recursive: true });
+      writeFileSync(transcriptPath, transcriptJsonl);
+      symlinkSync(outsidePath, answerPath);
+      writeFileSync(
+        path.join(timestampDir, 'index.jsonl'),
+        toJsonl({
+          ...RESULT_A,
+          experiment: 'escaped-answer',
+          transcript_path: transcriptArtifactPath,
+          answer_path: answerArtifactPath,
+        }),
+      );
+
+      const app = createApp([], tempDir, tempDir, undefined, { studioDir });
+      const res = await app.request(
+        `/api/runs/${encodeURIComponent(runId)}/evals/test-greeting/transcript`,
+      );
+
+      expect(res.status).toBe(200);
+      const text = await res.text();
+      expect(text).not.toContain(secret);
+      const data = JSON.parse(text) as {
+        status: string;
+        content: string;
+        answer_path: string;
+        answer_content?: string;
+      };
+      expect(data.status).toBe('ok');
+      expect(data.content).toBe(transcriptJsonl);
+      expect(data.answer_path).toBe(answerArtifactPath);
+      expect(data.answer_content).toBeUndefined();
+    });
+
+    it('does not read transcript bodies for list, detail, or aggregate routes', async () => {
+      const timestamp = '2026-03-25T14-00-00-000Z';
+      const transcriptArtifactPath = 'demo/test-greeting/outputs/transcript.jsonl';
+      const runId = writeLocalRunArtifact(tempDir, 'lazy-guard', timestamp, {
+        ...RESULT_A,
+        transcript_path: transcriptArtifactPath,
+      });
+      const timestampDir = path.join(
+        tempDir,
+        '.agentv',
+        'results',
+        'runs',
+        'lazy-guard',
+        timestamp,
+      );
+      mkdirSync(path.join(timestampDir, transcriptArtifactPath), { recursive: true });
+
+      const app = createApp([], tempDir, tempDir, undefined, { studioDir });
+
+      const listRes = await app.request('/api/runs');
+      expect(listRes.status).toBe(200);
+      const listData = (await listRes.json()) as {
+        runs: Array<{ filename: string; target?: string }>;
+      };
+      expect(listData.runs.find((run) => run.filename === runId)?.target).toBe('gpt-4o');
+
+      const detailRes = await app.request(`/api/runs/${encodeURIComponent(runId)}`);
+      expect(detailRes.status).toBe(200);
+      const detailData = (await detailRes.json()) as { results: unknown[] };
+      expect(detailData.results).toHaveLength(1);
+
+      const compareRes = await app.request('/api/compare');
+      expect(compareRes.status).toBe(200);
+      const compareData = (await compareRes.json()) as {
+        cells: Array<{ experiment: string; eval_count: number }>;
+      };
+      expect(compareData.cells.find((cell) => cell.experiment === 'lazy-guard')?.eval_count).toBe(
+        1,
+      );
+
+      const indexRes = await app.request('/api/index');
+      expect(indexRes.status).toBe(200);
+      const indexData = (await indexRes.json()) as {
+        entries: Array<{ run_filename: string; total_cost_usd: number }>;
+      };
+      expect(indexData.entries.find((entry) => entry.run_filename === runId)?.total_cost_usd).toBe(
+        RESULT_A.cost_usd,
+      );
+    });
+  });
+
   describe('GET /api/runs/:filename/evals/:evalId/files/*', () => {
     it('loads file content for experiment-scoped run ids', async () => {
       const runsDir = path.join(tempDir, '.agentv', 'results', 'runs', 'with-skills');
@@ -2612,6 +3070,37 @@ describe('serve app', () => {
       );
       expect(await downloadRes.text()).toBe(transcriptJsonl);
     });
+
+    it('rejects symlinked artifact file reads outside the run workspace', async () => {
+      const secret = 'outside raw artifact secret';
+      const outsidePath = path.join(tempDir, 'outside-response.md');
+      writeFileSync(outsidePath, secret);
+
+      const runsDir = path.join(tempDir, '.agentv', 'results', 'runs', 'escaped-file');
+      const runId = 'escaped-file::2026-03-25T10-30-00-000Z';
+      const timestampDir = path.join(runsDir, '2026-03-25T10-30-00-000Z');
+      const artifactPath = 'demo/test-greeting/outputs/response.md';
+      const symlinkPath = path.join(timestampDir, artifactPath);
+
+      mkdirSync(path.dirname(symlinkPath), { recursive: true });
+      symlinkSync(outsidePath, symlinkPath);
+      writeFileSync(
+        path.join(timestampDir, 'index.jsonl'),
+        toJsonl({
+          ...RESULT_A,
+          experiment: 'escaped-file',
+          output_path: artifactPath,
+        }),
+      );
+
+      const app = createApp([], tempDir, tempDir, undefined, { studioDir });
+      const res = await app.request(
+        `/api/runs/${encodeURIComponent(runId)}/evals/test-greeting/files/${artifactPath}?raw=1`,
+      );
+
+      expect(res.status).toBe(403);
+      expect(await res.text()).not.toContain(secret);
+    });
   });
 
   // ── GET /api/compare (tag filter) ───────────────────────────────────
diff --git a/apps/cli/test/commands/results/shared.test.ts b/apps/cli/test/commands/results/shared.test.ts
index 12f64f61e..5e701116e 100644
--- a/apps/cli/test/commands/results/shared.test.ts
+++ b/apps/cli/test/commands/results/shared.test.ts
@@ -79,6 +79,55 @@ describe('results shared source resolution', () => {
     expect(results[0].trace.toolCalls).toEqual({ rg: 1 });
   });
 
+  it('hydrates transcripts from artifact pointers when transcript_path is absent', () => {
+    const runDir = path.join(tempDir, '.agentv', 'results', 'runs', '2026-03-25T10-00-00-000Z');
+    const transcriptRelativePath = 'pointer-case/outputs/transcript.jsonl';
+    mkdirSync(path.join(runDir, 'pointer-case', 'outputs'), { recursive: true });
+    writeFileSync(
+      path.join(runDir, transcriptRelativePath),
+      `${JSON.stringify({
+        schema_version: 'agentv.transcript.v1',
+        test_id: 'pointer-case',
+        target: 'codex',
+        message_index: 0,
+        role: 'assistant',
+        content: 'Loaded from pointer',
+        source: { provider: 'codex', session_id: 'session-pointer' },
+      })}\n`,
+    );
+    const indexPath = path.join(runDir, 'index.jsonl');
+    writeFileSync(
+      indexPath,
+      `${JSON.stringify({
+        timestamp: '2026-03-25T10:00:00.000Z',
+        test_id: 'pointer-case',
+        target: 'codex',
+        score: 1,
+        grading_path: 'pointer-case/grading.json',
+        timing_path: 'pointer-case/timing.json',
+        artifact_pointers: {
+          transcript: {
+            ref: 'agentv/artifacts/v1',
+            key: 'transcripts/pointer-case/outputs/transcript.jsonl',
+            object_version: 'sha256:test',
+            path: transcriptRelativePath,
+            sha256: 'test',
+            size: 1,
+            schema_version: 'agentv.transcript.v1',
+            media_type: 'application/x-ndjson',
+            family: 'transcripts',
+          },
+        },
+      })}\n`,
+    );
+
+    const results = loadManifestResults(indexPath);
+
+    expect(results).toHaveLength(1);
+    expect(results[0].trace.messages[0]?.content).toBe('Loaded from pointer');
+    expect(results[0].trace.messages[0]?.role).toBe('assistant');
+  });
+
   it('rejects eval-case-only rows with migration guidance', () => {
     const runDir = path.join(tempDir, '.agentv', 'results', 'runs', '2026-03-25T10-00-00-000Z');
     mkdirSync(runDir, { recursive: true });
diff --git a/apps/dashboard/src/components/AnalyticsTab.tsx b/apps/dashboard/src/components/AnalyticsTab.tsx
index 4c45c48aa..0378e4d76 100644
--- a/apps/dashboard/src/components/AnalyticsTab.tsx
+++ b/apps/dashboard/src/components/AnalyticsTab.tsx
@@ -15,7 +15,7 @@
  * Backend contract:
  *   - `GET /api/compare`                → { cells, runs? }
  *   - `PUT /api/runs/:runId/tags`       → replaces sidecar tags.json
- *   - `DELETE /api/runs/:runId/tags`    → removes sidecar
+ *   - `DELETE /api/runs/:runId/tags`    → records an empty tag state
  *
  * To extend with a new mode: add a value to `ViewMode`, a button in the mode
  * toggle, and a new body component in the content switch. Hooks in any new
diff --git a/apps/dashboard/src/components/EvalDetail.tsx b/apps/dashboard/src/components/EvalDetail.tsx
index f1369b947..c9c8059a4 100644
--- a/apps/dashboard/src/components/EvalDetail.tsx
+++ b/apps/dashboard/src/components/EvalDetail.tsx
@@ -14,8 +14,10 @@ import {
   isPassing,
   projectEvalFileContentOptions,
   projectEvalFilesOptions,
+  projectEvalTranscriptOptions,
   useEvalFileContent,
   useEvalFiles,
+  useEvalTranscript,
   useStudioConfig,
 } from '~/lib/api';
 import type {
@@ -32,12 +34,7 @@ import type { FileNode } from './FileTree';
 import { FileTree } from './FileTree';
 import { MonacoViewer } from './MonacoViewer';
 import { ScoreBar } from './ScoreBar';
-import {
-  TranscriptTimeline,
-  findAnswerPath,
-  findTranscriptPath,
-  parseTranscriptJsonl,
-} from './TranscriptTimeline';
+import { TranscriptTimeline, parseTranscriptJsonl } from './TranscriptTimeline';
 
 interface EvalDetailProps {
   eval: EvalResult;
@@ -457,49 +454,68 @@ function TranscriptTab({
   onOpenFile: (path: string) => void;
 }) {
   const evalId = result.testId;
-  const { data: filesData, isLoading: isLoadingFiles } = projectId
-    ? useQuery(projectEvalFilesOptions(projectId, runId, evalId))
-    : useEvalFiles(runId, evalId);
-  const files = filesData?.files ?? [];
-  const transcriptPath = findTranscriptPath(files);
-  const answerPath = findAnswerPath(files);
-
-  const { data: transcriptContentData, isLoading: isLoadingTranscript } = projectId
-    ? useQuery(projectEvalFileContentOptions(projectId, runId, evalId, transcriptPath ?? ''))
-    : useEvalFileContent(runId, evalId, transcriptPath ?? '');
-  const { data: answerContentData } = projectId
-    ? useQuery(projectEvalFileContentOptions(projectId, runId, evalId, answerPath ?? ''))
-    : useEvalFileContent(runId, evalId, answerPath ?? '');
+  const {
+    data: transcriptData,
+    isLoading: isLoadingTranscript,
+    error: transcriptError,
+  } = projectId
+    ? useQuery(projectEvalTranscriptOptions(projectId, runId, evalId))
+    : useEvalTranscript(runId, evalId);
+  const transcriptPath = transcriptData?.transcript_path;
+  const answerPath = transcriptData?.answer_path;
+  const transcriptContent = transcriptData?.status === 'ok' ? (transcriptData.content ?? '') : '';
 
   const parsedTranscript = useMemo(
-    () => parseTranscriptJsonl(transcriptContentData?.content ?? ''),
-    [transcriptContentData?.content],
+    () => parseTranscriptJsonl(transcriptContent),
+    [transcriptContent],
   );
 
-  if (isLoadingFiles) {
+  if (isLoadingTranscript) {
     return (
       <div className="rounded-lg border border-gray-800 bg-gray-900 p-4 text-sm text-gray-500">
-        Loading transcript artifacts...
+        Loading transcript artifact...
       </div>
     );
   }
 
-  if (!transcriptPath) {
+  if (transcriptError) {
+    return (
+      <div className="rounded-lg border border-red-900/50 bg-red-950/20 p-4">
+        <h3 className="text-sm font-medium text-red-300">Transcript could not be loaded</h3>
+        <p className="mt-2 text-sm text-gray-300">{transcriptError.message}</p>
+      </div>
+    );
+  }
+
+  if (!transcriptData || transcriptData.status === 'missing') {
     return (
       <div className="rounded-lg border border-gray-800 bg-gray-900 p-4">
         <h3 className="text-sm font-medium text-gray-300">No structured transcript</h3>
         <p className="mt-2 text-sm text-gray-500">
-          This run does not include canonical <code>outputs/transcript.jsonl</code>. Dashboard does
-          not parse <code>response.md</code> or markdown transcripts for this view.
+          {transcriptData?.message ??
+            'This run does not include canonical outputs/transcript.jsonl. Dashboard does not parse response.md or markdown transcripts for this view.'}
         </p>
       </div>
     );
   }
 
-  if (isLoadingTranscript) {
+  if (transcriptData.status === 'dangling' || transcriptData.status === 'unsupported') {
     return (
-      <div className="rounded-lg border border-gray-800 bg-gray-900 p-4 text-sm text-gray-500">
-        Loading <code>{transcriptPath}</code>...
+      <div className="rounded-lg border border-amber-900/50 bg-amber-950/20 p-4">
+        <h3 className="text-sm font-medium text-amber-300">
+          {transcriptData.status === 'dangling'
+            ? 'Transcript artifact unavailable'
+            : 'Transcript pointer unsupported'}
+        </h3>
+        <p className="mt-2 text-sm text-gray-300">
+          {transcriptData.message ?? 'The transcript artifact could not be resolved.'}
+        </p>
+        {transcriptPath ? (
+          <p className="mt-2 font-mono text-xs text-gray-500">{transcriptPath}</p>
+        ) : null}
+        {transcriptData.pointer ? (
+          <p className="mt-2 font-mono text-xs text-gray-500">{transcriptData.pointer}</p>
+        ) : null}
       </div>
     );
   }
@@ -510,27 +526,31 @@ function TranscriptTab({
         <h3 className="text-sm font-medium text-red-300">Transcript could not be parsed</h3>
         <p className="mt-2 text-sm text-gray-300">{parsedTranscript.error}</p>
         <div className="mt-3 flex flex-wrap gap-2">
-          <button
-            type="button"
-            onClick={() => onOpenFile(transcriptPath)}
-            className="rounded-md border border-gray-700 px-3 py-1.5 text-sm text-gray-300 transition-colors hover:border-cyan-900/60 hover:text-cyan-300"
-          >
-            Open raw JSONL in Files
-          </button>
-          <a
-            href={artifactFileContentUrl({
-              projectId,
-              runId,
-              evalId,
-              filePath: transcriptPath,
-              raw: true,
-            })}
-            target="_blank"
-            rel="noreferrer"
-            className="rounded-md px-3 py-1.5 text-sm text-cyan-400 transition-colors hover:text-cyan-300 hover:underline"
-          >
-            Open raw JSONL
-          </a>
+          {transcriptPath ? (
+            <>
+              <button
+                type="button"
+                onClick={() => onOpenFile(transcriptPath)}
+                className="rounded-md border border-gray-700 px-3 py-1.5 text-sm text-gray-300 transition-colors hover:border-cyan-900/60 hover:text-cyan-300"
+              >
+                Open raw JSONL in Files
+              </button>
+              <a
+                href={artifactFileContentUrl({
+                  projectId,
+                  runId,
+                  evalId,
+                  filePath: transcriptPath,
+                  raw: true,
+                })}
+                target="_blank"
+                rel="noreferrer"
+                className="rounded-md px-3 py-1.5 text-sm text-cyan-400 transition-colors hover:text-cyan-300 hover:underline"
+              >
+                Open raw JSONL
+              </a>
+            </>
+          ) : null}
         </div>
       </div>
     );
@@ -550,25 +570,29 @@ function TranscriptTab({
   const answerHref = answerPath
     ? artifactFileContentUrl({ projectId, runId, evalId, filePath: answerPath, raw: true })
     : undefined;
-  const transcriptHref = artifactFileContentUrl({
-    projectId,
-    runId,
-    evalId,
-    filePath: transcriptPath,
-    raw: true,
-  });
-  const transcriptDownloadHref = artifactFileContentUrl({
-    projectId,
-    runId,
-    evalId,
-    filePath: transcriptPath,
-    download: true,
-  });
+  const transcriptHref = transcriptPath
+    ? artifactFileContentUrl({
+        projectId,
+        runId,
+        evalId,
+        filePath: transcriptPath,
+        raw: true,
+      })
+    : undefined;
+  const transcriptDownloadHref = transcriptPath
+    ? artifactFileContentUrl({
+        projectId,
+        runId,
+        evalId,
+        filePath: transcriptPath,
+        download: true,
+      })
+    : undefined;
 
   return (
     <TranscriptTimeline
       entries={parsedTranscript.entries}
-      finalAnswer={answerPath ? (answerContentData?.content ?? result.output) : undefined}
+      finalAnswer={answerPath ? (transcriptData.answer_content ?? result.output) : undefined}
       answerPath={answerPath}
       transcriptPath={transcriptPath}
       answerHref={answerHref}
diff --git a/apps/dashboard/src/lib/__fixtures__/trace-session-read-model.ts b/apps/dashboard/src/lib/__fixtures__/trace-session-read-model.ts
new file mode 100644
index 000000000..631549774
--- /dev/null
+++ b/apps/dashboard/src/lib/__fixtures__/trace-session-read-model.ts
@@ -0,0 +1,170 @@
+export const traceSessionEnvelopeFixture = {
+  schema_version: 'agentv.trace.v1',
+  artifact_id: 'execution-trace-fixture',
+  created_at: '2026-06-21T10:00:00.000Z',
+  eval: {
+    run_id: '2026-06-21T10-00-00-000Z',
+    test_id: 'nested-session',
+    suite: 'evals/github-backed.eval.yaml',
+    target: 'codex',
+  },
+  trace: {
+    format: 'otlp_openinference_spans',
+    trace_id: 'trace-123',
+    root_span_id: 'root-span',
+    spans: [
+      {
+        trace_id: 'trace-123',
+        span_id: 'root-span',
+        parent_span_id: null,
+        name: 'invoke_agent codex',
+        kind: 'INTERNAL',
+        start_time_unix_nano: '1000000000',
+        end_time_unix_nano: '2500000000',
+        status: { code: 'OK' },
+        attributes: {
+          'agentv.test_id': 'nested-session',
+          'agentv.target': 'codex',
+          'custom.unknown_value': { nested_value: true },
+          external_trace_url:
+            'https://phoenix.example/projects/agentv-dogfood/traces/phoenix-trace-456?api_key=secret',
+          external_trace_token: 'secret-span-token',
+          access_token: 'secret-access-token',
+          'gen_ai.usage.input_tokens': 14,
+          'gen_ai.usage.output_tokens': 9,
+        },
+        events: [
+          {
+            name: 'agentv.annotation',
+            time_unix_nano: '1200000000',
+            attributes: {
+              event_id: 'annotation-1',
+              text: 'Reviewer note',
+              passed: true,
+              extra_context: { source: 'grader' },
+              authorization: 'Bearer secret',
+              nested: { password: 'secret', safe_value: 'visible' },
+            },
+          },
+          {
+            name: 'agentv.score',
+            time_unix_nano: '2300000000',
+            attributes: {
+              event_id: 'score-1',
+              score: 0.82,
+              text: 'Rubric score',
+              passed: true,
+            },
+          },
+        ],
+      },
+      {
+        trace_id: 'trace-123',
+        span_id: 'child-chat',
+        parent_span_id: 'root-span',
+        name: 'chat gpt-5-codex',
+        kind: 'INTERNAL',
+        start_time_unix_nano: '1300000000',
+        end_time_unix_nano: '2200000000',
+        status: { code: 'OK' },
+        attributes: {
+          'gen_ai.operation.name': 'chat',
+          'openinference.span.kind': 'LLM',
+        },
+        events: [],
+      },
+      {
+        trace_id: 'trace-123',
+        span_id: 'grandchild-tool',
+        parent_span_id: 'child-chat',
+        name: 'execute_tool read_file',
+        kind: 'INTERNAL',
+        start_time_unix_nano: '1500000000',
+        end_time_unix_nano: '1700000000',
+        status: { code: 'OK' },
+        attributes: {
+          'gen_ai.tool.name': 'read_file',
+          'tool.name': 'read_file',
+        },
+        events: [],
+      },
+    ],
+  },
+  source: {
+    kind: 'agentv_run',
+    path: 'index.jsonl',
+    provider: 'codex',
+    format: 'agentv_result',
+    version: '1',
+    metadata: {
+      external_trace: {
+        provider: 'phoenix',
+        project: 'agentv-dogfood',
+        session_id: 'codex-session-123',
+        trace_id: 'phoenix-trace-456',
+        url: 'https://phoenix.example/projects/agentv-dogfood/traces/phoenix-trace-456?api_key=secret',
+        api_key: 'secret',
+      },
+      safe_note: 'local artifact remains canonical',
+      access_token: 'secret',
+    },
+  },
+  scores: [
+    {
+      name: 'rubric',
+      type: 'llm-grader',
+      score: 0.82,
+      weight: 1,
+      verdict: 'pass',
+      source: 'llm',
+      evaluated_at: '2026-06-21T10:00:02.300Z',
+      target_span_id: 'root-span',
+      evidence: {
+        assertions: [{ text: 'Rubric score', passed: true }],
+      },
+    },
+  ],
+};
+
+export const traceSessionMissingOptionalFixture = {
+  schema_version: 'agentv.trace.v1',
+  artifact_id: 'execution-trace-missing-optionals',
+  created_at: '2026-06-21T10:05:00.000Z',
+  eval: {
+    run_id: '2026-06-21T10-05-00-000Z',
+    test_id: 'missing-optionals',
+    target: 'codex',
+  },
+  trace: {
+    format: 'otlp_openinference_spans',
+    trace_id: 'trace-missing',
+    root_span_id: 'root-missing',
+    spans: [
+      {
+        trace_id: 'trace-missing',
+        span_id: 'root-missing',
+        parent_span_id: null,
+        name: 'invoke_agent codex',
+        kind: 'INTERNAL',
+        status: { code: 'OK' },
+        attributes: {
+          'agentv.test_id': 'missing-optionals',
+        },
+        events: [],
+      },
+    ],
+  },
+  source: {
+    kind: 'agentv_run',
+    path: 'index.jsonl',
+    provider: 'codex',
+    metadata: {
+      external_trace: {
+        provider: 'codex',
+        session_id: 'codex-session-789',
+        url: 'not-a-url',
+        token: 'secret',
+      },
+    },
+  },
+};
diff --git a/apps/dashboard/src/lib/api.ts b/apps/dashboard/src/lib/api.ts
index 467c3b216..869e1e81d 100644
--- a/apps/dashboard/src/lib/api.ts
+++ b/apps/dashboard/src/lib/api.ts
@@ -43,6 +43,7 @@ import type {
   StudioConfigResponse,
   SuitesResponse,
   TargetsResponse,
+  TranscriptArtifactResponse,
 } from './types';
 
 async function fetchJson<T>(url: string): Promise<T> {
@@ -231,6 +232,17 @@ export function evalFileContentOptions(runId: string, evalId: string, filePath:
   });
 }
 
+export function evalTranscriptOptions(runId: string, evalId: string) {
+  return queryOptions({
+    queryKey: ['runs', runId, 'evals', evalId, 'transcript'],
+    queryFn: () =>
+      fetchJson<TranscriptArtifactResponse>(
+        `/api/runs/${encodeURIComponent(runId)}/evals/${encodeURIComponent(evalId)}/transcript`,
+      ),
+    enabled: !!runId && !!evalId,
+  });
+}
+
 export function runCategoriesOptions(runId: string) {
   return queryOptions({
     queryKey: ['runs', runId, 'categories'],
@@ -321,6 +333,10 @@ export function useEvalFileContent(runId: string, evalId: string, filePath: stri
   return useQuery(evalFileContentOptions(runId, evalId, filePath));
 }
 
+export function useEvalTranscript(runId: string, evalId: string) {
+  return useQuery(evalTranscriptOptions(runId, evalId));
+}
+
 export function useRunCategories(runId: string) {
   return useQuery(runCategoriesOptions(runId));
 }
@@ -553,6 +569,17 @@ export function projectEvalFileContentOptions(
   });
 }
 
+export function projectEvalTranscriptOptions(projectId: string, runId: string, evalId: string) {
+  return queryOptions({
+    queryKey: ['projects', projectId, 'runs', runId, 'evals', evalId, 'transcript'],
+    queryFn: () =>
+      fetchJson<TranscriptArtifactResponse>(
+        `${projectApiBase(projectId)}/runs/${encodeURIComponent(runId)}/evals/${encodeURIComponent(evalId)}/transcript`,
+      ),
+    enabled: !!projectId && !!runId && !!evalId,
+  });
+}
+
 export function projectExperimentsOptions(projectId: string) {
   return queryOptions({
     queryKey: ['projects', projectId, 'experiments'],
@@ -665,7 +692,7 @@ export async function deleteRunApi(runId: string, projectId?: string): Promise<v
 /**
  * Replace the tags on a run. Tags are stored as a sidecar `tags.json` file
  * next to the run's manifest and surface as chips in the compare views.
- * Pass an empty array to clear all tags (server deletes the sidecar).
+ * Pass an empty array to clear all tags while preserving the clear watermark.
  */
 export async function saveRunTagsApi(
   runId: string,
@@ -687,7 +714,7 @@ export async function saveRunTagsApi(
   return res.json() as Promise<RunTagsResponse>;
 }
 
-/** Remove the tags sidecar for a run. */
+/** Clear the tags for a run while preserving the clear watermark. */
 export async function deleteRunTagsApi(runId: string, projectId?: string): Promise<void> {
   const url = projectId
     ? `${projectApiBase(projectId)}/runs/${encodeURIComponent(runId)}/tags`
diff --git a/apps/dashboard/src/lib/trace-read-model.test.ts b/apps/dashboard/src/lib/trace-read-model.test.ts
new file mode 100644
index 000000000..68127da36
--- /dev/null
+++ b/apps/dashboard/src/lib/trace-read-model.test.ts
@@ -0,0 +1,295 @@
+import { describe, expect, it } from 'bun:test';
+
+import {
+  traceSessionEnvelopeFixture,
+  traceSessionMissingOptionalFixture,
+} from './__fixtures__/trace-session-read-model';
+import {
+  type TraceSpanNode,
+  buildTraceSpanTree,
+  traceEnvelopeToTraceSessionResponse,
+} from './trace-read-model';
+
+function expectSnakeCaseFixtureKeys(value: unknown, path: string[] = []): void {
+  if (Array.isArray(value)) {
+    value.forEach((entry, index) => expectSnakeCaseFixtureKeys(entry, [...path, String(index)]));
+    return;
+  }
+  if (!value || typeof value !== 'object') {
+    return;
+  }
+
+  for (const [key, entry] of Object.entries(value)) {
+    const parentKey = path.at(-1);
+    if (parentKey !== 'attributes') {
+      expect(key, [...path, key].join('.')).toMatch(/^[a-z][a-z0-9_]*$/);
+    }
+    expectSnakeCaseFixtureKeys(entry, [...path, key]);
+  }
+}
+
+function flattenTree(nodes: readonly TraceSpanNode[]): TraceSpanNode[] {
+  return nodes.flatMap((node) => [node, ...flattenTree(node.children)]);
+}
+
+describe('trace session read model', () => {
+  it('projects snake_case trace artifacts into stable Dashboard span trees', () => {
+    const session = traceEnvelopeToTraceSessionResponse(traceSessionEnvelopeFixture, {
+      artifactPath: 'nested-session__codex/outputs/trace.json',
+    });
+    const tree = buildTraceSpanTree(session.spans);
+
+    expect(session).toMatchObject({
+      schema_version: 'agentv.dashboard.trace_session.v1',
+      run_id: '2026-06-21T10-00-00-000Z',
+      test_id: 'nested-session',
+      target: 'codex',
+      trace_id: 'trace-123',
+      root_span_id: 'root-span',
+      source: {
+        artifact_path: 'nested-session__codex/outputs/trace.json',
+      },
+    });
+    expect(session.spans.map((span) => span.id)).toEqual([
+      'root-span',
+      'child-chat',
+      'grandchild-tool',
+    ]);
+    expect(session.spans.map((span) => span.parent_span_id)).toEqual([
+      null,
+      'root-span',
+      'child-chat',
+    ]);
+    expect(tree).toHaveLength(1);
+    expect(tree[0].spanId).toBe('root-span');
+    expect(tree[0].children[0].spanId).toBe('child-chat');
+    expect(tree[0].children[0].children[0].spanId).toBe('grandchild-tool');
+  });
+
+  it('preserves score events, annotation events, scores, and unknown attributes', () => {
+    const session = traceEnvelopeToTraceSessionResponse(traceSessionEnvelopeFixture);
+    const root = session.spans.find((span) => span.span_id === 'root-span');
+
+    expect(root?.duration_ms).toBe(1500);
+    expect(root?.token_usage).toEqual({ input: 14, output: 9 });
+    expect(root?.attributes?.['custom.unknown_value']).toEqual({ nested_value: true });
+    expect(root?.attributes?.['gen_ai.usage.input_tokens']).toBe(14);
+    expect(root?.attributes).not.toHaveProperty('external_trace_url');
+    expect(root?.attributes).not.toHaveProperty('external_trace_token');
+    expect(root?.attributes).not.toHaveProperty('access_token');
+
+    expect(session.events.map((event) => [event.event_id, event.kind, event.name])).toEqual([
+      ['annotation-1', 'annotation', 'agentv.annotation'],
+      ['score-1', 'score', 'agentv.score'],
+    ]);
+    expect(session.events[0]).toMatchObject({
+      text: 'Reviewer note',
+      passed: true,
+      attributes: { extra_context: { source: 'grader' }, nested: { safe_value: 'visible' } },
+    });
+    expect(session.events[1]).toMatchObject({
+      score: 0.82,
+      text: 'Rubric score',
+      passed: true,
+    });
+    expect(session.scores).toEqual([
+      {
+        name: 'rubric',
+        type: 'llm-grader',
+        score: 0.82,
+        weight: 1,
+        verdict: 'pass',
+        source: 'llm',
+        evaluated_at: '2026-06-21T10:00:02.300Z',
+        target_span_id: 'root-span',
+        evidence: {
+          assertions: [{ text: 'Rubric score', passed: true }],
+        },
+      },
+    ]);
+  });
+
+  it('keeps external_trace links safe and leaves AgentV as canonical source', () => {
+    const session = traceEnvelopeToTraceSessionResponse(traceSessionEnvelopeFixture);
+
+    expect(session.external_trace).toEqual({
+      provider: 'phoenix',
+      project: 'agentv-dogfood',
+      session_id: 'codex-session-123',
+      trace_id: 'phoenix-trace-456',
+      url: 'https://phoenix.example/projects/agentv-dogfood/traces/phoenix-trace-456',
+    });
+    expect(JSON.stringify(session.external_trace)).not.toContain('secret');
+    expect(JSON.stringify(session.external_trace)).not.toContain('api_key');
+    expect(JSON.stringify(session)).not.toContain('secret');
+    expect(JSON.stringify(session)).not.toContain('api_key');
+    expect(session.source?.metadata).toEqual({
+      safe_note: 'local artifact remains canonical',
+    });
+  });
+
+  it('does not invent zero timing, token usage, or broken external links for missing fields', () => {
+    const session = traceEnvelopeToTraceSessionResponse(traceSessionMissingOptionalFixture);
+    const root = session.spans[0];
+
+    expect(root.start_time_unix_nano).toBeUndefined();
+    expect(root.end_time_unix_nano).toBeUndefined();
+    expect(root.start_time).toBeUndefined();
+    expect(root.end_time).toBeUndefined();
+    expect(root.duration_ms).toBeUndefined();
+    expect(root.token_usage).toBeUndefined();
+    expect(session.external_trace).toEqual({
+      provider: 'codex',
+      session_id: 'codex-session-789',
+    });
+    expect(JSON.stringify(session.external_trace)).not.toContain('secret');
+    expect(JSON.stringify(session.external_trace)).not.toContain('not-a-url');
+  });
+
+  it('preserves duplicate span IDs with collision-free node IDs and diagnostics', () => {
+    const tree = buildTraceSpanTree([
+      {
+        id: 'root',
+        span_id: 'root',
+        parent_span_id: null,
+        name: 'root',
+        start_time_unix_nano: '1000',
+      },
+      {
+        id: 'dup',
+        span_id: 'dup',
+        parent_span_id: 'root',
+        name: 'first duplicate',
+        start_time_unix_nano: '1100',
+      },
+      {
+        id: 'dup',
+        span_id: 'dup',
+        parent_span_id: 'root',
+        name: 'second duplicate',
+        start_time_unix_nano: '1200',
+      },
+    ]);
+    const nodes = flattenTree(tree);
+
+    expect(nodes.map((node) => node.id)).toEqual(['root', 'dup', 'dup#2']);
+    expect(nodes.map((node) => node.span.name)).toEqual([
+      'root',
+      'first duplicate',
+      'second duplicate',
+    ]);
+    expect(nodes[2].diagnostics?.map((diagnostic) => diagnostic.code)).toEqual([
+      'duplicate_span_id',
+    ]);
+  });
+
+  it('promotes self-parented spans and ancestor cycles to diagnostic roots', () => {
+    const tree = buildTraceSpanTree([
+      {
+        id: 'self',
+        span_id: 'self',
+        parent_span_id: 'self',
+        name: 'self',
+        start_time_unix_nano: '3000',
+      },
+      {
+        id: 'cycle-a',
+        span_id: 'cycle-a',
+        parent_span_id: 'cycle-b',
+        name: 'cycle-a',
+        start_time_unix_nano: '1000',
+      },
+      {
+        id: 'cycle-b',
+        span_id: 'cycle-b',
+        parent_span_id: 'cycle-a',
+        name: 'cycle-b',
+        start_time_unix_nano: '2000',
+      },
+    ]);
+    const nodes = flattenTree(tree);
+
+    expect(tree.map((node) => node.spanId)).toEqual(['cycle-a', 'cycle-b', 'self']);
+    expect(nodes.every((node) => node.children.length === 0)).toBe(true);
+    expect(nodes.map((node) => node.diagnostics?.[0]?.code)).toEqual([
+      'cycle',
+      'cycle',
+      'self_parent',
+    ]);
+  });
+
+  it('keeps missing-ID and missing-parent spans as diagnostic roots', () => {
+    const tree = buildTraceSpanTree([
+      {
+        id: '',
+        span_id: '',
+        parent_span_id: null,
+        name: 'missing id',
+      },
+      {
+        id: 'orphan',
+        span_id: 'orphan',
+        parent_span_id: 'missing-parent',
+        name: 'orphan',
+      },
+    ]);
+
+    expect(tree.map((node) => node.id)).toEqual(['missing-span-0', 'orphan']);
+    expect(tree.map((node) => node.diagnostics?.[0]?.code)).toEqual([
+      'missing_span_id',
+      'missing_parent',
+    ]);
+  });
+
+  it('sorts roots and children by start time with stable span ID tie breaks', () => {
+    const tree = buildTraceSpanTree([
+      {
+        id: 'root-b',
+        span_id: 'root-b',
+        parent_span_id: null,
+        name: 'root-b',
+        start_time_unix_nano: '2000',
+      },
+      {
+        id: 'child-late',
+        span_id: 'child-late',
+        parent_span_id: 'root-a',
+        name: 'child-late',
+        start_time_unix_nano: '1200',
+      },
+      {
+        id: 'root-a',
+        span_id: 'root-a',
+        parent_span_id: null,
+        name: 'root-a',
+        start_time_unix_nano: '1000',
+      },
+      {
+        id: 'child-early',
+        span_id: 'child-early',
+        parent_span_id: 'root-a',
+        name: 'child-early',
+        start_time_unix_nano: '1100',
+      },
+      {
+        id: 'child-alpha',
+        span_id: 'child-alpha',
+        parent_span_id: 'root-a',
+        name: 'child-alpha',
+        start_time_unix_nano: '1200',
+      },
+    ]);
+
+    expect(tree.map((node) => node.spanId)).toEqual(['root-a', 'root-b']);
+    expect(tree[0].children.map((node) => node.spanId)).toEqual([
+      'child-early',
+      'child-alpha',
+      'child-late',
+    ]);
+  });
+
+  it('keeps new API fixtures snake_case-only outside opaque attributes maps', () => {
+    expectSnakeCaseFixtureKeys(traceSessionEnvelopeFixture);
+    expectSnakeCaseFixtureKeys(traceSessionMissingOptionalFixture);
+  });
+});
diff --git a/apps/dashboard/src/lib/trace-read-model.ts b/apps/dashboard/src/lib/trace-read-model.ts
new file mode 100644
index 000000000..b14fa4c2b
--- /dev/null
+++ b/apps/dashboard/src/lib/trace-read-model.ts
@@ -0,0 +1,696 @@
+import type {
+  ExternalTraceMetadata,
+  TraceSessionEvent,
+  TraceSessionEventKind,
+  TraceSessionResponse,
+  TraceSessionScore,
+  TraceSessionSource,
+  TraceSessionSpan,
+  TraceSessionTokenUsage,
+} from './types';
+
+export const TRACE_SESSION_SCHEMA_VERSION = 'agentv.dashboard.trace_session.v1' as const;
+
+export interface TraceSessionProjectionOptions {
+  runId?: string;
+  artifactPath?: string;
+}
+
+export interface TraceSpanNode {
+  id: string;
+  spanId: string;
+  parentSpanId?: string | null;
+  span: TraceSessionSpan;
+  children: TraceSpanNode[];
+  diagnostics?: TraceSpanTreeDiagnostic[];
+}
+
+export type TraceSpanTreeDiagnosticCode =
+  | 'cycle'
+  | 'duplicate_span_id'
+  | 'missing_parent'
+  | 'missing_span_id'
+  | 'self_parent';
+
+export interface TraceSpanTreeDiagnostic {
+  code: TraceSpanTreeDiagnosticCode;
+  message: string;
+  span_id?: string;
+  node_id?: string;
+  parent_span_id?: string;
+}
+
+function isRecord(value: unknown): value is Record<string, unknown> {
+  return typeof value === 'object' && value !== null && !Array.isArray(value);
+}
+
+function asRecord(value: unknown): Record<string, unknown> | undefined {
+  return isRecord(value) ? value : undefined;
+}
+
+function asArray(value: unknown): unknown[] {
+  return Array.isArray(value) ? value : [];
+}
+
+function stringValue(value: unknown): string | undefined {
+  return typeof value === 'string' && value.length > 0 ? value : undefined;
+}
+
+function finiteNumber(value: unknown): number | undefined {
+  return typeof value === 'number' && Number.isFinite(value) ? value : undefined;
+}
+
+function boolValue(value: unknown): boolean | undefined {
+  return typeof value === 'boolean' ? value : undefined;
+}
+
+function dropUndefined<T extends Record<string, unknown>>(value: T): T {
+  return Object.fromEntries(Object.entries(value).filter(([, entry]) => entry !== undefined)) as T;
+}
+
+function compactRecord(value: Record<string, unknown>): Record<string, unknown> | undefined {
+  const compacted = dropUndefined(value);
+  return Object.keys(compacted).length > 0 ? compacted : undefined;
+}
+
+function nonEmptyArray<T>(value: readonly T[] | undefined): readonly T[] | undefined {
+  return value && value.length > 0 ? value : undefined;
+}
+
+function unixNanoToIso(value: string | undefined): string | undefined {
+  if (!value) {
+    return undefined;
+  }
+  try {
+    return new Date(Number(BigInt(value) / 1_000_000n)).toISOString();
+  } catch {
+    return undefined;
+  }
+}
+
+function durationMsFromNanos(
+  start: string | undefined,
+  end: string | undefined,
+): number | undefined {
+  if (!start || !end) {
+    return undefined;
+  }
+  try {
+    const startNanos = BigInt(start);
+    const endNanos = BigInt(end);
+    if (endNanos < startNanos) {
+      return undefined;
+    }
+    return Number(endNanos - startNanos) / 1_000_000;
+  } catch {
+    return undefined;
+  }
+}
+
+function numberFromAttributes(
+  attributes: Record<string, unknown>,
+  keys: readonly string[],
+): number | undefined {
+  for (const key of keys) {
+    const value = finiteNumber(attributes[key]);
+    if (value !== undefined) {
+      return value;
+    }
+  }
+  return undefined;
+}
+
+function tokenUsageFromAttributes(
+  attributes: Record<string, unknown> | undefined,
+): TraceSessionTokenUsage | undefined {
+  if (!attributes) {
+    return undefined;
+  }
+
+  const nested = asRecord(attributes.token_usage);
+  const usage = compactRecord({
+    input:
+      finiteNumber(nested?.input) ??
+      numberFromAttributes(attributes, [
+        'gen_ai.usage.input_tokens',
+        'llm.token_count.prompt',
+        'input_tokens',
+      ]),
+    output:
+      finiteNumber(nested?.output) ??
+      numberFromAttributes(attributes, [
+        'gen_ai.usage.output_tokens',
+        'llm.token_count.completion',
+        'output_tokens',
+      ]),
+    reasoning:
+      finiteNumber(nested?.reasoning) ??
+      numberFromAttributes(attributes, [
+        'gen_ai.usage.reasoning.output_tokens',
+        'reasoning_tokens',
+      ]),
+    cached:
+      finiteNumber(nested?.cached) ??
+      numberFromAttributes(attributes, ['gen_ai.usage.cache_read.input_tokens', 'cached_tokens']),
+    total: finiteNumber(nested?.total) ?? numberFromAttributes(attributes, ['total_tokens']),
+  });
+
+  return usage as TraceSessionTokenUsage | undefined;
+}
+
+function isExternalTraceKey(key: string): boolean {
+  return (
+    key === 'external_trace' ||
+    key.startsWith('external_trace_') ||
+    key.startsWith('external_trace.')
+  );
+}
+
+function isCredentialLikeKey(key: string): boolean {
+  const normalized = key.toLowerCase();
+  if (
+    normalized === 'token_usage' ||
+    normalized.endsWith('_tokens') ||
+    normalized.endsWith('.tokens') ||
+    normalized.includes('usage.')
+  ) {
+    return false;
+  }
+  return /(^|[._-])(api[._-]?key|authorization|bearer|password|secret|private[._-]?key|access[._-]?token|auth[._-]?token|client[._-]?secret|id[._-]?token|refresh[._-]?token|session[._-]?token|token)($|[._-])/.test(
+    normalized,
+  );
+}
+
+function sanitizeAttributeMap(
+  value: Record<string, unknown> | undefined,
+): Record<string, unknown> | undefined {
+  if (!value) {
+    return undefined;
+  }
+  const entries = Object.entries(value).flatMap(([key, entry]) => {
+    if (isExternalTraceKey(key) || isCredentialLikeKey(key)) {
+      return [];
+    }
+    if (isRecord(entry)) {
+      const nested = sanitizeAttributeMap(entry);
+      return nested ? [[key, nested] as const] : [];
+    }
+    return [[key, entry] as const];
+  });
+  return entries.length > 0 ? Object.fromEntries(entries) : undefined;
+}
+
+function spanStatusFromValue(value: unknown): TraceSessionSpan['status'] {
+  const record = asRecord(value);
+  if (!record) {
+    return undefined;
+  }
+  return compactRecord({
+    code:
+      stringValue(record.code) ??
+      (typeof record.code === 'number' ? String(record.code) : undefined),
+    message: stringValue(record.message),
+  }) as TraceSessionSpan['status'];
+}
+
+function eventKind(
+  name: string,
+  attributes: Record<string, unknown> | undefined,
+): TraceSessionEventKind {
+  const lowerName = name.toLowerCase();
+  if (
+    lowerName.includes('score') ||
+    finiteNumber(attributes?.score) !== undefined ||
+    finiteNumber(attributes?.['agentv.score']) !== undefined ||
+    finiteNumber(attributes?.['agentv.grader.score']) !== undefined
+  ) {
+    return 'score';
+  }
+  if (
+    lowerName.includes('annotation') ||
+    stringValue(attributes?.text) !== undefined ||
+    stringValue(attributes?.annotation) !== undefined ||
+    stringValue(attributes?.['agentv.annotation.text']) !== undefined
+  ) {
+    return 'annotation';
+  }
+  if (lowerName === 'exception') {
+    return 'exception';
+  }
+  return 'event';
+}
+
+function scoreFromEvent(attributes: Record<string, unknown> | undefined): number | undefined {
+  if (!attributes) {
+    return undefined;
+  }
+  return (
+    finiteNumber(attributes.score) ??
+    finiteNumber(attributes['agentv.score']) ??
+    finiteNumber(attributes['agentv.grader.score'])
+  );
+}
+
+function textFromEvent(attributes: Record<string, unknown> | undefined): string | undefined {
+  if (!attributes) {
+    return undefined;
+  }
+  return (
+    stringValue(attributes.text) ??
+    stringValue(attributes.annotation) ??
+    stringValue(attributes['agentv.annotation.text']) ??
+    stringValue(attributes['exception.message'])
+  );
+}
+
+function passedFromEvent(attributes: Record<string, unknown> | undefined): boolean | undefined {
+  if (!attributes) {
+    return undefined;
+  }
+  return boolValue(attributes.passed) ?? boolValue(attributes['agentv.annotation.passed']);
+}
+
+function eventId(
+  spanId: string,
+  index: number,
+  attributes: Record<string, unknown> | undefined,
+): string {
+  return (
+    stringValue(attributes?.event_id) ??
+    stringValue(attributes?.['agentv.event_id']) ??
+    `${spanId}:event:${index}`
+  );
+}
+
+function projectSpanEvent(
+  spanId: string,
+  event: unknown,
+  index: number,
+): TraceSessionEvent | undefined {
+  const record = asRecord(event);
+  if (!record) {
+    return undefined;
+  }
+  const name = stringValue(record.name);
+  if (!name) {
+    return undefined;
+  }
+
+  const attributes = asRecord(record.attributes);
+  const safeAttributes = sanitizeAttributeMap(attributes);
+  return dropUndefined({
+    event_id: eventId(spanId, index, attributes),
+    span_id: spanId,
+    name,
+    kind: eventKind(name, attributes),
+    time_unix_nano: stringValue(record.time_unix_nano),
+    timestamp: unixNanoToIso(stringValue(record.time_unix_nano)),
+    score: scoreFromEvent(attributes),
+    text: textFromEvent(attributes),
+    passed: passedFromEvent(attributes),
+    attributes: safeAttributes,
+  });
+}
+
+function projectSpan(span: unknown, index: number): TraceSessionSpan | undefined {
+  const record = asRecord(span);
+  if (!record) {
+    return undefined;
+  }
+
+  const spanId = stringValue(record.span_id) ?? `span-${index}`;
+  const traceId = stringValue(record.trace_id);
+  const parentSpanId = record.parent_span_id === null ? null : stringValue(record.parent_span_id);
+  const attributes = asRecord(record.attributes);
+  const safeAttributes = sanitizeAttributeMap(attributes);
+  const startTimeUnixNano = stringValue(record.start_time_unix_nano);
+  const endTimeUnixNano = stringValue(record.end_time_unix_nano);
+  const events = asArray(record.events)
+    .map((event, eventIndex) => projectSpanEvent(spanId, event, eventIndex))
+    .filter((event): event is TraceSessionEvent => event !== undefined);
+
+  return dropUndefined({
+    id: spanId,
+    trace_id: traceId,
+    span_id: spanId,
+    parent_span_id: parentSpanId,
+    name: stringValue(record.name) ?? spanId,
+    kind: stringValue(record.kind),
+    status: spanStatusFromValue(record.status),
+    start_time_unix_nano: startTimeUnixNano,
+    end_time_unix_nano: endTimeUnixNano,
+    start_time: unixNanoToIso(startTimeUnixNano),
+    end_time: unixNanoToIso(endTimeUnixNano),
+    duration_ms: durationMsFromNanos(startTimeUnixNano, endTimeUnixNano),
+    token_usage: tokenUsageFromAttributes(attributes),
+    attributes: safeAttributes,
+    events: events.length > 0 ? events : undefined,
+  });
+}
+
+function projectScores(scores: unknown): TraceSessionScore[] | undefined {
+  const projected: TraceSessionScore[] = [];
+
+  for (const score of asArray(scores)) {
+    const record = asRecord(score);
+    const name = stringValue(record?.name);
+    const value = finiteNumber(record?.score);
+    if (!record || !name || value === undefined) {
+      continue;
+    }
+    projected.push(
+      dropUndefined({
+        name,
+        type: stringValue(record.type),
+        score: value,
+        weight: finiteNumber(record.weight),
+        verdict: stringValue(record.verdict),
+        source: stringValue(record.source),
+        evaluated_at: stringValue(record.evaluated_at),
+        target_span_id: stringValue(record.target_span_id),
+        evidence: asRecord(record.evidence),
+      }) as TraceSessionScore,
+    );
+  }
+
+  return projected.length > 0 ? projected : undefined;
+}
+
+const EXTERNAL_TRACE_KEYS = ['provider', 'project', 'session_id', 'trace_id', 'url'] as const;
+
+function isSecretLikeKey(key: string): boolean {
+  return /(api[_-]?key|authorization|bearer|password|secret|token)/i.test(key);
+}
+
+function sanitizeUrl(value: unknown): string | undefined {
+  const raw = stringValue(value);
+  if (!raw) {
+    return undefined;
+  }
+  try {
+    const url = new URL(raw);
+    if (!['http:', 'https:'].includes(url.protocol) || url.username || url.password) {
+      return undefined;
+    }
+    url.search = '';
+    url.hash = '';
+    return url.toString();
+  } catch {
+    return undefined;
+  }
+}
+
+function sanitizeExternalTrace(value: unknown): ExternalTraceMetadata | undefined {
+  const record = asRecord(value);
+  if (!record) {
+    return undefined;
+  }
+
+  const sanitized = compactRecord({
+    provider: stringValue(record.provider),
+    project: stringValue(record.project),
+    session_id: stringValue(record.session_id),
+    trace_id: stringValue(record.trace_id),
+    url: sanitizeUrl(record.url),
+  }) as ExternalTraceMetadata | undefined;
+
+  return sanitized && EXTERNAL_TRACE_KEYS.some((key) => sanitized[key] !== undefined)
+    ? sanitized
+    : undefined;
+}
+
+function externalTraceFromFlatMetadata(
+  metadata: Record<string, unknown> | undefined,
+): ExternalTraceMetadata | undefined {
+  if (!metadata) {
+    return undefined;
+  }
+  return sanitizeExternalTrace({
+    provider: metadata.external_trace_provider ?? metadata['external_trace.provider'],
+    project: metadata.external_trace_project ?? metadata['external_trace.project'],
+    session_id: metadata.external_trace_session_id ?? metadata['external_trace.session_id'],
+    trace_id: metadata.external_trace_trace_id ?? metadata['external_trace.trace_id'],
+    url: metadata.external_trace_url ?? metadata['external_trace.url'],
+  });
+}
+
+function sanitizeMetadata(
+  value: Record<string, unknown> | undefined,
+): Record<string, unknown> | undefined {
+  if (!value) {
+    return undefined;
+  }
+  const entries = Object.entries(value).flatMap(([key, entry]) => {
+    if (isExternalTraceKey(key) || isSecretLikeKey(key)) {
+      return [];
+    }
+    if (isRecord(entry)) {
+      const nested = sanitizeMetadata(entry);
+      return nested ? [[key, nested] as const] : [];
+    }
+    return [[key, entry] as const];
+  });
+  return entries.length > 0 ? Object.fromEntries(entries) : undefined;
+}
+
+function sourceFromEnvelope(
+  source: Record<string, unknown> | undefined,
+  artifactPath: string | undefined,
+): TraceSessionSource | undefined {
+  if (!source && !artifactPath) {
+    return undefined;
+  }
+  return compactRecord({
+    kind: stringValue(source?.kind),
+    path: stringValue(source?.path),
+    provider: stringValue(source?.provider),
+    format: stringValue(source?.format),
+    version: stringValue(source?.version),
+    artifact_path: artifactPath,
+    metadata: sanitizeMetadata(asRecord(source?.metadata)),
+  }) as TraceSessionSource | undefined;
+}
+
+function externalTraceFromEnvelope(
+  envelope: Record<string, unknown>,
+): ExternalTraceMetadata | undefined {
+  const source = asRecord(envelope.source);
+  const sourceMetadata = asRecord(source?.metadata);
+  const trace = asRecord(envelope.trace);
+  const rootSpanId = stringValue(trace?.root_span_id);
+  const rootSpan = asArray(trace?.spans)
+    .map(asRecord)
+    .find((span) => stringValue(span?.span_id) === rootSpanId);
+  const rootAttributes = asRecord(rootSpan?.attributes);
+
+  return (
+    sanitizeExternalTrace(envelope.external_trace) ??
+    sanitizeExternalTrace(sourceMetadata?.external_trace) ??
+    externalTraceFromFlatMetadata(sourceMetadata) ??
+    externalTraceFromFlatMetadata(rootAttributes)
+  );
+}
+
+export function traceEnvelopeToTraceSessionResponse(
+  input: unknown,
+  options: TraceSessionProjectionOptions = {},
+): TraceSessionResponse {
+  const envelope = asRecord(input) ?? {};
+  const evaluation = asRecord(envelope.eval);
+  const trace = asRecord(envelope.trace);
+  const spans = asArray(trace?.spans)
+    .map(projectSpan)
+    .filter((span): span is TraceSessionSpan => span !== undefined);
+  const events = spans.flatMap((span) => span.events ?? []);
+
+  return dropUndefined({
+    schema_version: TRACE_SESSION_SCHEMA_VERSION,
+    artifact_id: stringValue(envelope.artifact_id),
+    created_at: stringValue(envelope.created_at),
+    run_id: options.runId ?? stringValue(evaluation?.run_id),
+    test_id: stringValue(evaluation?.test_id),
+    suite: stringValue(evaluation?.suite),
+    target: stringValue(evaluation?.target),
+    trace_id: stringValue(trace?.trace_id),
+    root_span_id: stringValue(trace?.root_span_id),
+    source: sourceFromEnvelope(asRecord(envelope.source), options.artifactPath),
+    external_trace: externalTraceFromEnvelope(envelope),
+    spans,
+    events,
+    scores: projectScores(envelope.scores),
+  });
+}
+
+export function buildTraceSpanTree(spans: readonly TraceSessionSpan[]): TraceSpanNode[] {
+  const nodes: TraceSpanNode[] = [];
+  const firstNodeBySpanId = new Map<string, TraceSpanNode>();
+  const spanIdCounts = new Map<string, number>();
+
+  spans.forEach((span, index) => {
+    const rawSpanId = stringValue(span.span_id);
+    const spanId = rawSpanId ?? `missing-span-${index}`;
+    const occurrence = (spanIdCounts.get(spanId) ?? 0) + 1;
+    spanIdCounts.set(spanId, occurrence);
+
+    const node: TraceSpanNode = {
+      id: occurrence === 1 ? spanId : `${spanId}#${occurrence}`,
+      spanId,
+      parentSpanId: span.parent_span_id,
+      span,
+      children: [],
+      diagnostics: rawSpanId
+        ? undefined
+        : [
+            {
+              code: 'missing_span_id',
+              message: 'Span was missing span_id and was assigned a stable node id.',
+              node_id: spanId,
+            },
+          ],
+    };
+
+    if (occurrence > 1) {
+      addNodeDiagnostic(node, {
+        code: 'duplicate_span_id',
+        message: 'Duplicate span_id was preserved with a collision-free node id.',
+        span_id: spanId,
+        node_id: node.id,
+      });
+    }
+    if (!firstNodeBySpanId.has(spanId)) {
+      firstNodeBySpanId.set(spanId, node);
+    }
+    nodes.push(node);
+  });
+
+  const parentByNodeId = new Map<string, TraceSpanNode>();
+  for (const node of nodes) {
+    const parentSpanId =
+      typeof node.parentSpanId === 'string' && node.parentSpanId.length > 0
+        ? node.parentSpanId
+        : undefined;
+    if (!parentSpanId) {
+      continue;
+    }
+    if (parentSpanId === node.spanId) {
+      addNodeDiagnostic(node, {
+        code: 'self_parent',
+        message: 'Span parent_span_id points to itself; span was promoted to a root.',
+        span_id: node.spanId,
+        node_id: node.id,
+        parent_span_id: parentSpanId,
+      });
+      continue;
+    }
+    const parent = firstNodeBySpanId.get(parentSpanId);
+    if (!parent) {
+      addNodeDiagnostic(node, {
+        code: 'missing_parent',
+        message: 'Span parent_span_id was not present in this trace; span was promoted to a root.',
+        span_id: node.spanId,
+        node_id: node.id,
+        parent_span_id: parentSpanId,
+      });
+      continue;
+    }
+    parentByNodeId.set(node.id, parent);
+  }
+
+  const cyclicNodes: TraceSpanNode[] = [];
+  for (const node of nodes) {
+    if (hasAncestorCycle(node, parentByNodeId)) {
+      cyclicNodes.push(node);
+    }
+  }
+  for (const node of cyclicNodes) {
+    parentByNodeId.delete(node.id);
+    addNodeDiagnostic(node, {
+      code: 'cycle',
+      message: 'Span parent chain contains a cycle; span was promoted to a root.',
+      span_id: node.spanId,
+      node_id: node.id,
+      parent_span_id: typeof node.parentSpanId === 'string' ? node.parentSpanId : undefined,
+    });
+  }
+
+  const roots: TraceSpanNode[] = [];
+  for (const node of nodes) {
+    const parent = parentByNodeId.get(node.id);
+    if (parent) {
+      parent.children.push(node);
+    } else {
+      roots.push(node);
+    }
+  }
+
+  sortTraceSpanNodes(roots);
+  return roots;
+}
+
+function addNodeDiagnostic(node: TraceSpanNode, diagnostic: TraceSpanTreeDiagnostic): void {
+  node.diagnostics = [...(node.diagnostics ?? []), diagnostic];
+}
+
+function hasAncestorCycle(
+  node: TraceSpanNode,
+  parentByNodeId: ReadonlyMap<string, TraceSpanNode>,
+): boolean {
+  const seen = new Set<string>();
+  let cursor = parentByNodeId.get(node.id);
+  while (cursor) {
+    if (cursor.id === node.id || seen.has(cursor.id)) {
+      return true;
+    }
+    seen.add(cursor.id);
+    cursor = parentByNodeId.get(cursor.id);
+  }
+  return false;
+}
+
+function compareUnixNanoValue(first: string | undefined, second: string | undefined): number {
+  if (first === second) {
+    return 0;
+  }
+  if (!first) {
+    return 1;
+  }
+  if (!second) {
+    return -1;
+  }
+  try {
+    const firstValue = BigInt(first);
+    const secondValue = BigInt(second);
+    return firstValue < secondValue ? -1 : firstValue > secondValue ? 1 : 0;
+  } catch {
+    return first.localeCompare(second);
+  }
+}
+
+function compareTraceSpanNodes(first: TraceSpanNode, second: TraceSpanNode): number {
+  const byStart = compareUnixNanoValue(
+    first.span.start_time_unix_nano,
+    second.span.start_time_unix_nano,
+  );
+  if (byStart !== 0) {
+    return byStart;
+  }
+  if (first.spanId === second.parentSpanId) {
+    return -1;
+  }
+  if (second.spanId === first.parentSpanId) {
+    return 1;
+  }
+  const bySpanId = first.spanId.localeCompare(second.spanId);
+  return bySpanId !== 0 ? bySpanId : first.id.localeCompare(second.id);
+}
+
+function sortTraceSpanNodes(nodes: TraceSpanNode[]): void {
+  nodes.sort(compareTraceSpanNodes);
+  for (const node of nodes) {
+    node.children.sort(compareTraceSpanNodes);
+    if (node.children.length > 0) {
+      sortTraceSpanNodes(node.children);
+    }
+    node.diagnostics = nonEmptyArray(node.diagnostics) as TraceSpanTreeDiagnostic[] | undefined;
+  }
+}
diff --git a/apps/dashboard/src/lib/types.ts b/apps/dashboard/src/lib/types.ts
index 087836a37..a3d7d20b3 100644
--- a/apps/dashboard/src/lib/types.ts
+++ b/apps/dashboard/src/lib/types.ts
@@ -35,6 +35,10 @@ export interface RunMeta {
   pending_tags?: string[];
   /** True when local editable metadata differs from the fetched remote metadata. */
   metadata_dirty?: boolean;
+  /** Materialized final run state consumed by readers instead of folding raw operations. */
+  final_state?: RunFinalState;
+  /** Operation-log watermark for the materialized final state. */
+  oplog_watermark?: RunOplogWatermark;
   /**
    * Live execution status. Only present for Dashboard-launched runs that are
    * still being tracked in-memory — used to render a spinner in RunList
@@ -44,6 +48,17 @@ export interface RunMeta {
   status?: 'starting' | 'running' | 'finished' | 'failed';
 }
 
+export interface RunOplogWatermark {
+  ref: string;
+  operation_id?: string;
+  updated_at?: string;
+}
+
+export interface RunFinalState {
+  lifecycle: 'active' | 'hidden' | 'deleted';
+  tags: string[];
+}
+
 export interface RunListResponse {
   runs: RunMeta[];
   next_cursor?: string;
@@ -120,6 +135,103 @@ export interface SourceTraceability {
   referenced_files?: SourceReferencedFile[];
 }
 
+export interface ExternalTraceMetadata {
+  /**
+   * Optional external viewer reference only. AgentV run artifacts remain the
+   * canonical source of truth for Dashboard trace/session details.
+   */
+  provider?: string;
+  project?: string;
+  session_id?: string;
+  trace_id?: string;
+  url?: string;
+}
+
+export interface TraceSessionTokenUsage {
+  input?: number;
+  output?: number;
+  reasoning?: number;
+  cached?: number;
+  total?: number;
+}
+
+export interface TraceSessionSpanStatus {
+  code?: string;
+  message?: string;
+}
+
+export type TraceSessionEventKind = 'annotation' | 'exception' | 'event' | 'score';
+
+export interface TraceSessionEvent {
+  event_id: string;
+  span_id: string;
+  name: string;
+  kind: TraceSessionEventKind;
+  time_unix_nano?: string;
+  timestamp?: string;
+  score?: number;
+  text?: string;
+  passed?: boolean;
+  attributes?: Record<string, unknown>;
+}
+
+export interface TraceSessionSpan {
+  id: string;
+  trace_id?: string;
+  span_id: string;
+  parent_span_id?: string | null;
+  name: string;
+  kind?: string;
+  status?: TraceSessionSpanStatus;
+  start_time_unix_nano?: string;
+  end_time_unix_nano?: string;
+  start_time?: string;
+  end_time?: string;
+  duration_ms?: number;
+  token_usage?: TraceSessionTokenUsage;
+  attributes?: Record<string, unknown>;
+  events?: TraceSessionEvent[];
+}
+
+export interface TraceSessionScore {
+  name: string;
+  type?: string;
+  score: number;
+  weight?: number;
+  verdict?: string;
+  source?: string;
+  evaluated_at?: string;
+  target_span_id?: string;
+  evidence?: Record<string, unknown>;
+}
+
+export interface TraceSessionSource {
+  kind?: string;
+  path?: string;
+  provider?: string;
+  format?: string;
+  version?: string;
+  artifact_path?: string;
+  metadata?: Record<string, unknown>;
+}
+
+export interface TraceSessionResponse {
+  schema_version: 'agentv.dashboard.trace_session.v1';
+  artifact_id?: string;
+  created_at?: string;
+  run_id?: string;
+  test_id?: string;
+  suite?: string;
+  target?: string;
+  trace_id?: string;
+  root_span_id?: string;
+  source?: TraceSessionSource;
+  external_trace?: ExternalTraceMetadata;
+  spans: TraceSessionSpan[];
+  events: TraceSessionEvent[];
+  scores?: TraceSessionScore[];
+}
+
 export interface EvalResult {
   testId: string;
   timestamp?: string;
@@ -149,6 +261,8 @@ export interface RunDetailResponse {
   results: EvalResult[];
   source: 'local' | 'remote';
   source_label?: string;
+  final_state?: RunFinalState;
+  oplog_watermark?: RunOplogWatermark;
   /** Live execution status when this run is still tracked in-memory by Dashboard. */
   status?: 'starting' | 'running' | 'finished' | 'failed';
   /** Path to the run workspace directory (relative to cwd when inside, otherwise absolute). Local runs only. */
@@ -176,6 +290,19 @@ export interface EvalDetailResponse {
   eval: EvalResult;
 }
 
+export type TranscriptArtifactStatus = 'ok' | 'missing' | 'dangling' | 'unsupported';
+
+export interface TranscriptArtifactResponse {
+  status: TranscriptArtifactStatus;
+  transcript_path?: string;
+  answer_path?: string;
+  answer_content?: string;
+  content?: string;
+  language?: string;
+  message?: string;
+  pointer?: string;
+}
+
 export interface IndexEntry {
   run_filename: string;
   display_name?: string;
@@ -260,6 +387,8 @@ export interface CompareRunEntry {
   remote_tags?: string[];
   pending_tags?: string[];
   metadata_dirty?: boolean;
+  final_state?: RunFinalState;
+  oplog_watermark?: RunOplogWatermark;
   source: 'local' | 'remote';
   eval_count: number;
   quality_count?: number;
@@ -283,6 +412,8 @@ export interface RunTagsResponse {
   remote_tags?: string[];
   pending_tags?: string[];
   metadata_dirty?: boolean;
+  final_state?: RunFinalState;
+  oplog_watermark?: RunOplogWatermark;
   updated_at: string;
 }
 
diff --git a/apps/web/src/content/docs/docs/evaluation/running-evals.mdx b/apps/web/src/content/docs/docs/evaluation/running-evals.mdx
index e3fc7803e..ae18284b7 100644
--- a/apps/web/src/content/docs/docs/evaluation/running-evals.mdx
+++ b/apps/web/src/content/docs/docs/evaluation/running-evals.mdx
@@ -439,22 +439,37 @@ Each result row's `artifact_dir` can include both `outputs/trace.json` and
 artifact; use each index row's `transcript_path` to find the per-result
 transcript.
 
+Rows also include `artifact_pointers` for AgentV-owned artifact storage. Pointer
+entries such as `artifact_pointers.trace` and `artifact_pointers.transcript`
+carry the storage `ref`, artifact `key`, canonical run-relative `path`,
+`object_version`, `sha256`, `size`, `schema_version`, and `media_type` so
+viewers and exports can migrate from git refs to object storage without changing
+the run record contract.
+
 `outputs/trace.json` is the full-fidelity `agentv.trace.v1` sidecar.
 It stores the canonical span graph, source metadata, capture/redaction policy,
 conversion warnings, score provenance, and opaque evidence references.
 
-`outputs/transcript.jsonl` is a derived compatibility artifact for reading and
-replay. It uses provider-neutral `agentv.transcript.v1` rows with stable
-top-level fields for message order, role/content, tool calls and paired results,
-timing, token usage, cost, source metadata, capture state, and trace pointers.
+`outputs/transcript.jsonl` is the canonical AgentV transcript/timeline artifact.
+It uses provider-neutral `agentv.transcript.v1` rows with stable top-level fields
+for message order, role/content, tool calls and paired results, timing, token
+usage, cost, source metadata, capture state, and trace pointers.
 Provider-native payloads can appear only inside opaque nested fields such as
 `metadata`, `source.metadata`, tool `input`, or tool `output`.
 
+When an agent provider captures a native stream or session log, the result row
+may also include `raw_provider_log_path`, pointing at
+`outputs/raw/provider.log`. That file is raw evidence copied byte-for-byte from
+the provider log and is not parsed, normalized, or required for replay, import,
+Agent Skills conversion, or grading. AgentV does not write or maintain a
+parallel `outputs/transcript.json` source of truth.
+
 Use the transcript when you need a compact portable message/event projection
 over the trace, including exports to role/content arrays for chat-template or
 Hugging Face-style workflows. Use the trace when you need full lifecycle, span,
-raw evidence, redaction, or adapter conversion details. The transcript is not a
-second canonical trace source and is not a provider-native Pi session dump.
+raw evidence pointers, redaction, or adapter conversion details. The transcript
+is not a second canonical trace source and is not a provider-native Pi session
+dump.
 Older transcript rows without `schema_version`, `capture`, or `trace` remain
 accepted for replay.
 
diff --git a/apps/web/src/content/docs/docs/tools/dashboard.mdx b/apps/web/src/content/docs/docs/tools/dashboard.mdx
index fadec9963..fd5f9eced 100644
--- a/apps/web/src/content/docs/docs/tools/dashboard.mdx
+++ b/apps/web/src/content/docs/docs/tools/dashboard.mdx
@@ -146,7 +146,7 @@ Select 2+ rows with the checkboxes and click the sticky **Compare N** action to
 
 ### Retroactive tags
 
-Click any row's **Tags** cell to tag a run after the fact. Each run can carry multiple free-form tags (max 20, up to 60 characters each); local tags are stored in a `tags.json` sidecar next to `index.jsonl` in the run workspace, so they're mutable, non-destructive, and won't touch your eval YAML or run manifest. The chip editor supports Enter/comma to commit a new tag, Backspace to remove the last chip, and **Clear all** to remove every tag (deletes the sidecar).
+Click any row's **Tags** cell to tag a run after the fact. Each run can carry multiple free-form tags (max 20, up to 60 characters each); local tags are stored in a `tags.json` sidecar next to `index.jsonl` in the run workspace, so they're mutable, non-destructive, and won't touch your eval YAML or run manifest. The chip editor supports Enter/comma to commit a new tag, Backspace to remove the last chip, and **Clear all** to record an empty tag state with an operation watermark.
 
 Remote run payloads stay immutable, but their tags are editable. Dashboard writes remote tag changes as metadata overlays under `.agentv/results/metadata/runs/.../tags.json` in the configured results repo clone. Until those overlays are synced, the run and project show a dirty state; **Sync Project** commits and pushes them when it is safe to do so.
 
diff --git a/apps/web/src/content/docs/docs/tools/import.mdx b/apps/web/src/content/docs/docs/tools/import.mdx
index 54794e6ec..86ff592aa 100644
--- a/apps/web/src/content/docs/docs/tools/import.mdx
+++ b/apps/web/src/content/docs/docs/tools/import.mdx
@@ -176,7 +176,10 @@ Rows without `schema_version`, `capture`, or `trace` from older AgentV transcrip
 exports remain replayable. New eval run artifacts write the v1 shape.
 For eval run artifacts, `outputs/transcript.jsonl` is derived from
 `outputs/trace.json`; it is a portable message/event projection, not a second
-canonical trace source or a provider-native session dump.
+canonical trace source or a provider-native session dump. Provider-native
+session or stream logs, when captured during an eval run, are separate raw
+evidence artifacts referenced by `raw_provider_log_path`; Agent Skills import,
+convert, transpile, and run paths do not require them.
 
 ## What Gets Parsed
 
diff --git a/apps/web/src/content/docs/docs/tools/results.mdx b/apps/web/src/content/docs/docs/tools/results.mdx
index 4837595aa..add4afdfc 100644
--- a/apps/web/src/content/docs/docs/tools/results.mdx
+++ b/apps/web/src/content/docs/docs/tools/results.mdx
@@ -108,6 +108,50 @@ Duplicate policy is explicit:
 
 `attempt` defaults to `0`, `variant` defaults to `null`, and `source_target` defaults to `target` when a run has no replay source. Replay and rerun sources can set `source_target`, `attempt`, or `variant`; those values are part of the identity, so different attempts, variants, or source targets produce distinct projection IDs.
 
+### Vendor-neutral projection bundle
+
+Use the additive projection bundle path when an external adapter needs a
+backend-neutral handoff instead of AgentV's full artifact tree:
+
+```bash
+agentv results export <run-workspace-or-index.jsonl> --projection-bundle
+```
+
+This writes `projection_bundle.json` next to the exported artifacts. The bundle
+contains stable projection IDs, trace envelope metadata, OpenInference-shaped
+span references, score provenance, artifact-relative paths, capture/redaction
+summary, and conversion warnings. It does not call Phoenix, Opik, Braintrust,
+Langfuse, Hugging Face, or any other live service.
+
+For adapter development and CI snapshots, use dry-run mode:
+
+```bash
+agentv results export <run-workspace-or-index.jsonl> --dry-run > projection_bundle.json
+```
+
+Dry-run prints deterministic JSON and does not write export artifacts. Vendor
+adapters should consume either this JSON directly or the local
+`projection_bundle.json`. Dry-run refs are marked
+`artifact_refs.status: "planned_export"` because the export tree has not been
+written. Bundles written with `--projection-bundle` are built from the emitted
+export `index.jsonl` and use `artifact_refs.status: "emitted"`.
+
+Raw prompt text, final output, and tool arguments/results are excluded by
+default, and raw-bearing artifact refs such as `grading_path`, `input_path`,
+`answer_path`, `response_path`, `transcript_path`, and `trace_path` are omitted from
+metadata-only bundles. To include raw payloads and raw-bearing refs in the
+bundle, opt in explicitly:
+
+```bash
+agentv results export <run-workspace-or-index.jsonl> --dry-run --include-raw-content
+```
+
+Keep backend-specific anonymization in the adapter layer. For example, an Opik
+adapter can read the metadata-only bundle by default, or require
+`--include-raw-content` and then run Opik anonymizers before upload. AgentV does
+not run a custom redaction engine in `results export`; it records the capture
+policy so downstream processing is auditable.
+
 ## Inspection helpers
 
 For lightweight terminal workflows:
@@ -127,7 +171,7 @@ The CLI contract is deliberately narrow: `agentv results` manages local result a
 
 Use these supported remote workflows instead:
 
-- **Automatic publishing:** configure `projects[].results` or top-level `results`; new `agentv eval` and `agentv pipeline bench` runs publish completed artifacts after the run completes. Use `repo_path: .` with `branch: agentv/results/v1` to store results on a dedicated branch of the source repo. `repo_path` means an existing local Git checkout, distinct from `workspace.repos[].repo`, which is a portable repository identity. Set `sync.auto_push: true` to push after publish, or `sync.require_push: true` in CI to fail when that push fails. While an eval is still running, [WIP checkpoints](/docs/tools/wip-checkpoints/) can keep partial run output durable on `agentv/wip/...` branches when auto-push is enabled.
+- **Automatic publishing:** configure `projects[].results` or top-level `results`; new `agentv eval` and `agentv pipeline bench` runs publish completed artifacts after the run completes. Use `repo_path: .` with `branch: agentv/results/v1` to store primary result records on a dedicated branch of the source repo. AgentV reserves `agentv/results/v1` for primary results, `agentv/artifacts/v1` for heavy artifact payloads, and `agentv/oplog/v1` for mutable run/result operations. `repo_path` means an existing local Git checkout, distinct from `workspace.repos[].repo`, which is a portable repository identity. Set `sync.auto_push: true` to push after publish, or `sync.require_push: true` in CI to fail when that push fails. While an eval is still running, [WIP checkpoints](/docs/tools/wip-checkpoints/) can keep partial run output durable on `agentv/wip/...` branches when auto-push is enabled.
 - **Manual Dashboard sync:** run `agentv dashboard`, open the project, and use **Sync Project**.
 - **Manual API sync:** while Dashboard is running, call `GET /api/projects/:projectId/remote/status` or `POST /api/projects/:projectId/remote/sync` for project-scoped automation. Single-project sessions also expose `GET /api/remote/status` and `POST /api/remote/sync`.
 - **Git escape hatch:** for advanced recovery, inspect or repair the configured `projects[].results.path` clone with `git` directly, then sync again.
diff --git a/docs/dogfood-reports/2026-06-21-dogfood-integration-av-vwa-16-10-dogfood.md b/docs/dogfood-reports/2026-06-21-dogfood-integration-av-vwa-16-10-dogfood.md
new file mode 100644
index 000000000..04b66ff1a
--- /dev/null
+++ b/docs/dogfood-reports/2026-06-21-dogfood-integration-av-vwa-16-10-dogfood.md
@@ -0,0 +1,175 @@
+# Dogfood Report - dogfood-integration-av-vwa-16-10
+
+> Diff-scoped CLI and Dashboard QA of `dogfood-integration-av-vwa-16-10` vs `origin/main`. Generated by `/ce-dogfood-beta` on 2026-06-21.
+
+## Diff Summary
+
+- Adds canonical AgentV artifact refs and pointer shapes for result rows, trace sidecars, transcript projections, raw provider logs, and projection bundles.
+- Writes provider-neutral transcript artifacts at `outputs/transcript.jsonl` while keeping raw provider logs separate at `outputs/raw/provider.log`.
+- Adds results combine/export/projection behavior that preserves or rewrites artifact pointers and keeps default exports metadata-oriented.
+- Adds oplog-shaped run tag state, tag clear tombstones, and Dashboard/API fields for final run state and watermarks.
+- Updates Dashboard API and UI so run lists/details stay metadata-oriented while the Transcript tab lazily loads canonical transcript content and handles missing/dangling/unsupported states.
+- Adds a Dashboard trace read model that preserves problematic span graphs with diagnostics and sanitizes external trace or credential-like attributes.
+
+## Personas
+
+Source: `STRATEGY.md` "Who it's for".
+
+- **AI platform engineers and agent builders** - evaluate real agent workflows, compare targets, gate changes, and inspect portable run artifacts from the same workspace their teams already use.
+
+## Flows Tested
+
+### Flow A - Canonical Result Artifact Emission
+
+```mermaid
+flowchart TD
+    A[Eval result is written] --> B[Per-test artifact directory is created]
+    B --> C[Trace envelope written to outputs/trace.json]
+    C --> D{Trace has transcript rows?}
+    D -->|Yes| E[Canonical transcript JSONL written to outputs/transcript.jsonl]
+    D -->|No| F[Transcript path and pointer are omitted]
+    E --> G{Raw provider log present?}
+    F --> G
+    G -->|Yes| H[Raw log copied to outputs/raw/provider.log]
+    G -->|No| I[Index row excludes raw_provider_log_path]
+    H --> J[Index row records raw_provider_log_path separately]
+    I --> K[Index row emits artifact_pointers with agentv/artifacts/v1]
+    J --> K
+    K --> L[Consumers parse snake_case row and reject new camelCase artifactPointers]
+```
+
+### Flow B - Dashboard Metadata and Lazy Transcript Loading
+
+```mermaid
+flowchart TD
+    A[User opens Dashboard run list] --> B[API loads lightweight run metadata]
+    B --> C[User opens a run detail]
+    C --> D[API hydrates detail without transcript bodies]
+    D --> E[User selects an eval]
+    E --> F[Checks tab shows metadata and grader state]
+    F --> G[User opens Transcript tab]
+    G --> H{Canonical transcript pointer resolves?}
+    H -->|Yes| I[Transcript endpoint reads outputs/transcript.jsonl lazily]
+    H -->|Missing| J[No structured transcript state]
+    H -->|Dangling| K[Unavailable artifact state with path]
+    H -->|Unsupported| L[Unsupported pointer state with pointer details]
+    I --> M[Timeline renders transcript and raw/download links]
+```
+
+### Flow C - Combine Run Artifact Pointer Rewriting
+
+```mermaid
+flowchart TD
+    A[User selects two or more run workspaces] --> B[Combine reads each index.jsonl]
+    B --> C{Duplicate test_id and target rows?}
+    C -->|Error policy| D[Conflict is reported]
+    C -->|Latest or explicit choice| E[Selected rows are kept]
+    C -->|No duplicates| E
+    E --> F[Referenced artifacts are copied under sources/source-N]
+    F --> G[Trace artifact pointers are rewritten]
+    G --> H[Transcript artifact pointers are rewritten]
+    H --> I[Combined index.jsonl points only at copied files]
+```
+
+### Flow D - Tags and Oplog Watermarks
+
+```mermaid
+flowchart TD
+    A[Run metadata is read] --> B[Tag sidecar or remote state is materialized]
+    B --> C[User sets tags]
+    C --> D[run.tags.set operation watermark is written]
+    D --> E[Run list/detail exposes final_state and oplog_watermark]
+    E --> F[User clears tags]
+    F --> G[Empty tag tombstone is written]
+    G --> H[final_state.tags is empty and clear watermark is preserved]
+```
+
+### Flow E - Trace Read Model Hardening
+
+```mermaid
+flowchart TD
+    A[Dashboard reads trace envelope] --> B[Project spans, events, scores, and external trace metadata]
+    B --> C[Credential-like and unsafe external attributes are removed]
+    C --> D{Span graph shape}
+    D -->|Duplicate span ids| E[Preserve nodes with collision-free ids and diagnostics]
+    D -->|Missing parents| F[Promote span to diagnostic root]
+    D -->|Self-parent or cycle| G[Promote cyclic spans to diagnostic roots]
+    E --> H[Stable tree is rendered]
+    F --> H
+    G --> H
+```
+
+### Flow F - Projection Bundle Export
+
+```mermaid
+flowchart TD
+    A[User requests projection bundle or dry run] --> B[Completed run manifest is read]
+    B --> C{Raw content opted in?}
+    C -->|No default| D[Bundle records metadata-only capture policy]
+    D --> E[artifact_refs are planned_export and omit raw-bearing paths]
+    E --> F[Trace envelopes omit raw evidence and transcript metadata payloads]
+    C -->|Yes| G[Bundle includes raw content and emitted artifact refs]
+    G --> H[Adapters receive explicit full-content payload]
+```
+
+## Test Matrix & Results
+
+| # | Flow | Journey / Scenario | Status | Issue | Fix | Commit |
+|---|------|--------------------|--------|-------|-----|--------|
+| 1 | A | Artifact writer emits `outputs/transcript.jsonl`, canonical `artifact_pointers.transcript.ref=agentv/artifacts/v1`, and canonical trace pointer refs. | Pass | Verified by artifact-writer regression tests. | - | - |
+| 2 | A | Raw provider log is copied to `outputs/raw/provider.log`, remains separate from canonical transcript rows, and parsed result rows do not treat it as a fresh source log. | Pass | Verified by artifact-writer and orchestrator tests. | - | - |
+| 3 | A | New invalid camelCase `artifactPointers` rows are rejected while historical result-row aliases still normalize at the boundary. | Pass | Verified by parser/shared results tests. | - | - |
+| 4 | C | Combining runs copies pointed trace/transcript files and rewrites pointer paths/keys to `sources/source-N/...`. | Pass | Verified by combine tests. | - | - |
+| 5 | D | Local tag set and tag clear/tombstone operations preserve `final_state` and a fresh `oplog_watermark`. | Pass | Verified by tests and live API set/clear/readback against the fixture server. | - | - |
+| 6 | B | Run list, run detail, compare, and index API routes stay metadata-oriented and do not read transcript bodies. | Pass | Verified by serve tests and live API detail payload without transcript body content. | - | - |
+| 7 | B | Transcript endpoint returns lazy `ok`, `missing`, `dangling`, and pointer-shaped transcript states from canonical transcript pointers. | Pass | Verified by serve tests, live API calls, and browser Transcript tab states. | - | - |
+| 8 | E | Trace read model handles duplicate spans, missing parents, self-parent/cycles, and sanitizes external/credential-like attributes. | Pass | Verified by Dashboard trace read-model tests. | - | - |
+| 9 | F | Projection bundle dry-run/default export marks planned refs correctly and excludes raw-bearing payloads by default. | Fixed | Live dry run crashed when a hydrated grader score omitted `assertions`. | Added missing-array fallbacks in result index and trace envelope score serialization, plus regression coverage. | b25b0475 |
+| 10 | B | Browser UAT: Dashboard run list/detail remains usable, Transcript tab lazy-loads canonical content, and console errors are absent. | Pass | Agent-browser verified run list/detail, canonical/missing/dangling/unsupported Transcript tab states, lazy request logs, and no page errors. | - | - |
+
+## What Was Fixed
+
+### Projection bundle dry run crashed on grader scores without assertions - `b25b0475`
+
+- **Symptom:** `agentv results export <run> --projection-bundle --dry-run` crashed with `undefined is not an object (evaluating 'score.assertions.map')` when a hydrated grading artifact had a grader score without an `assertions` array.
+- **Root cause:** `packages/core/src/evaluation/run-artifacts.ts` and `packages/core/src/evaluation/trace-envelope.ts` assumed every `GraderResult` carried `assertions`, but historical or hand-authored grading artifacts can omit that optional array.
+- **Fix:** Normalize missing score assertions to an empty array in index-row score serialization and trace-envelope score evidence serialization.
+- **Regression test:** `apps/cli/test/commands/results/export.test.ts` now builds a projection bundle from a grader score that omits `assertions`.
+
+## Console Errors
+
+None observed through `agent-browser errors` after canonical, missing, dangling, and unsupported Transcript tab checks. `agent-browser console` was also empty on the canonical transcript path.
+
+Expected test-suite stderr included git fallback warnings for intentionally invalid remote fixtures; the suite passed.
+
+## Evidence
+
+- Diff analyzed with `git diff --name-only origin/main...HEAD` and focused code reads across result writing, combine/export, serve, Dashboard detail/API, and trace read model paths.
+- Built core with `bun --filter @agentv/core build`.
+- Built Dashboard with `bun run build` from `apps/dashboard/`; Vite emitted only the existing large-chunk warning.
+- Ran focused regression suite after the fix: `333 pass`, `0 fail`, `1372 expect() calls`, across 10 files.
+- Live Dashboard/results server started from source against a local fixture project on port 3217.
+- Live API checks covered run list/detail, transcript `ok`/`missing`/`dangling`/`unsupported`, tag set/clear/readback, and projection dry run.
+- Browser UAT used `agent-browser` with a local fixture project. Screenshots were captured outside the public repo as `transcript-tab.png` and `transcript-unsupported.png`.
+
+## Human Verifications
+
+Not applicable. The proof used local fixtures and CLI/Dashboard APIs only; no OAuth, email, payment, SMS, or external provider leg was required.
+
+## Decisions for a Human
+
+None.
+
+## Learnings
+
+- Projection/export code must tolerate historical or hand-authored grader score records that omit optional arrays. Treat missing optional evidence as empty evidence rather than crashing export.
+- The lazy transcript boundary is doing useful work: list/detail payloads remain small and metadata-oriented, while transcript body content is fetched only after the user opens the Transcript tab.
+- Raw provider logs stay safe as separate evidence under `outputs/raw/provider.log`; they are not canonical transcripts and should not be reinterpreted as source logs on parsed result rows.
+
+## Final Status
+
+Pass after fix. The integrated results/artifacts/transcript stack is ready for review from this dogfood pass.
+
+Functional failure fixed locally: `b25b0475`.
+
+Human-decision blockers: none.
diff --git a/docs/plans/2026-06-21-001-feat-av-quf-results-storage-plan.md b/docs/plans/2026-06-21-001-feat-av-quf-results-storage-plan.md
new file mode 100644
index 000000000..e5dcf346b
--- /dev/null
+++ b/docs/plans/2026-06-21-001-feat-av-quf-results-storage-plan.md
@@ -0,0 +1,888 @@
+---
+title: "feat: Specify git-native results storage, retention, and oplog"
+type: feat
+date: 2026-06-21
+bead: av-quf
+base: av-vwa.16.10
+---
+
+# feat: Specify git-native results storage, retention, and oplog
+
+## Summary
+
+Bead `av-quf` should turn the current git-backed results implementation into a
+documented storage contract with three backend modes, a single results branch,
+one artifact sidecar namespace, retention and compaction rules, a compact
+publication export, an append-only mutable-operation log, and an S3-compatible
+object-storage tier.
+
+The canonical AgentV run artifacts stay `benchmark.json`, `index.jsonl`, per-test
+grading/timing files, `outputs/trace.json`, and derived transcript artifacts. GitHub,
+Backblaze B2, Phoenix, Hugging Face, and Dashboard are projections, viewers, or storage
+backends over those artifacts.
+
+---
+
+## Problem Frame
+
+`packages/core/src/evaluation/results-repo.ts` already implements the first git-native
+slice: `agentv/results/v1` is the default results branch, `runs/**` is listed with
+`git ls-tree`, `benchmark.json` blobs are read with `git cat-file --batch`, and the
+branch root is a deterministic orphan genesis. Current mutable tags live under
+`metadata/runs/**`, and heavy transcript sidecars are still written inside each run
+workspace by `packages/core/src/evaluation/run-artifacts.ts`.
+
+The next implementation beads need a precise shared contract before they split work.
+The contract must avoid branch proliferation, keep AgentV artifacts canonical, and
+define how git, object storage, retention, publication, and mutable operations compose
+without creating another hosted results platform inside AgentV.
+
+---
+
+## Scope Boundaries
+
+### In Scope
+
+- Define storage backend modes and per-mode listing/index strategies.
+- Pin the git-native ref and path layout for `agentv/results/v1`,
+  `agentv/artifacts/v1`, and `agentv/oplog/v1`.
+- Define retention, compaction, and migration rules for run metadata and heavy artifacts.
+- Define compact publication export as a derived artifact over `benchmark.json` and
+  `index.jsonl`, with no required `eval.txt`.
+- Define the mutable operation log and add-wins tag set semantics.
+- Define the Backblaze B2 S3-compatible object tier and secret-loading boundary.
+- Name concrete files, functions, and tests for dependent implementation beads.
+
+### Out of Scope
+
+- Implementing storage backends, S3, oplog, retention, or export code in this bead.
+- Adding GitHub issues or tracker runtime state.
+- Creating windowed branches, per-run branches, or a hosted Dashboard replacement.
+- Making Phoenix, Hugging Face, B2, or GitHub the canonical results model.
+
+### Deferred to Follow-Up Work
+
+- Path sharding under `runs/` or artifact prefixes. Only add it after a benchmark with
+  realistic run counts proves `git ls-tree` or object-store listing is too slow.
+- PR-based publishing for human-reviewed result repositories. Machine-generated eval
+  results should keep direct append commits until a concrete workflow needs review gates.
+- A generic non-B2 object-store provider matrix. Start with S3-compatible configuration
+  narrow enough to support B2 and avoid provider-specific APIs.
+
+---
+
+## Requirements
+
+### Storage Modes
+
+- R1. The default mode remains git-native and must work with the current explicit
+  `repo_path` or `repo_url` results configuration.
+- R2. Hybrid mode must keep the run index and metadata in git while moving selected
+  heavy artifact payloads to object storage.
+- R3. Blob-native mode must store run index, metadata, artifacts, and oplog in object
+  storage without requiring a git checkout, git object database, or git remote.
+- R4. Each mode must define its listing/index strategy: git tree listing for git-backed
+  modes and bucket manifest plus `ListObjectsV2` fallback for blob-native mode.
+
+### Git Layout And Sync
+
+- R5. The primary results ref is `agentv/results/v1`.
+- R6. Heavy artifact sidecars use the single artifact ref or namespace
+  `agentv/artifacts/v1`, with path prefixes such as `transcripts/`, `raw-logs/`,
+  and `screenshots/`.
+- R7. Mutable operations use the single oplog ref or namespace `agentv/oplog/v1`.
+- R8. The git-native branch must keep deterministic orphan genesis and must not create
+  windowed branches or per-run branches.
+- R9. Path sharding is not part of v1 unless measurement at realistic scale proves it is
+  needed.
+
+### Retention And Publication
+
+- R10. Retention must distinguish logical pruning from history compaction in git-backed
+  modes.
+- R11. Hybrid and blob-native modes must support object lifecycle policy alignment for
+  artifact payloads without deleting index metadata prematurely.
+- R12. Transcript migration must support transcripts under
+  `agentv/artifacts/v1` while preserving existing logical artifact references.
+- R13. Publication export must be compact and derived from `benchmark.json` plus
+  `index.jsonl`; it must not require an authored or generated `eval.txt`.
+
+### Mutable Operations
+
+- R14. Mutable run/result operations must be append-only per actor first.
+- R15. Tag mutation semantics start as an add-wins tag set, not direct mutation of
+  immutable run artifacts.
+- R16. Oplog storage location must be defined for all three modes.
+
+### Object Storage
+
+- R17. The object-storage tier targets Backblaze B2 through its S3-compatible API.
+- R18. The implementation must use a standard S3 SDK/client, not B2-native APIs.
+- R19. Credentials must come from environment or config populated by the BWS CLI, and
+  resolved secret values must not be written into AgentV artifacts, config examples, or
+  committed docs.
+
+---
+
+## Key Technical Decisions
+
+- KTD1. Backend mode is a storage concern, not a product model. Use `git-native`,
+  `hybrid`, and `blob-native` as storage modes while keeping `benchmark.json` and
+  `index.jsonl` as the artifact contract that readers consume.
+- KTD2. Do not overload the existing `results.mode: github` field. Add
+  `results.storage_mode` with values `git-native`, `hybrid`, and `blob-native`, and
+  normalize missing `storage_mode` to `git-native`. Put object-store settings under
+  `results.object_store`.
+- KTD3. The git tree remains the index for git-backed modes. `listGitRuns()` should
+  continue to list `runs/**/benchmark.json` from `agentv/results/v1`; no separate
+  branch-local `index/runs.jsonl` is introduced.
+- KTD4. Use one artifact sidecar namespace named `artifacts`. Do not introduce
+  `artifact-blobs`, `blobs`, or per-artifact refs. Prefix by artifact class, for example
+  `transcripts/<run-path>/...`, `raw-logs/<run-path>/...`, and
+  `screenshots/<run-path>/...`.
+- KTD5. Use sibling Git refs for results, artifacts, and oplog. Git refs are stored
+  path-like, so `agentv/results/v1` cannot coexist with child refs such as
+  `agentv/results/v1/artifacts` or `agentv/results/v1/oplog`.
+- KTD6. Hybrid mode keeps git as the metadata and index authority, while object storage
+  stores selected heavy payload bytes. Git contains stable artifact locator records with
+  checksums, sizes, and logical paths so readers can verify fetched payloads.
+- KTD7. Blob-native mode mirrors the same logical namespaces in the bucket, but does not
+  emulate git refs. It owns bucket manifests and per-prefix object listings.
+- KTD8. Mutable operations are derived overlays. Existing `metadata/runs/**/tags.json`
+  is a compatibility read/write surface until oplog materialization replaces direct
+  overlay writes.
+- KTD9. Publication export is a projection. It should read completed run bundles and
+  emit a compact publishable directory without becoming a new source of truth.
+- KTD10. Backblaze B2 is addressed only through S3-compatible endpoints and Signature V4.
+  The object client should be a standard S3 client configured with endpoint, region,
+  bucket, and credentials.
+
+---
+
+## High-Level Technical Design
+
+### Storage Topology
+
+```mermaid
+flowchart TB
+  Local[Local run workspace .agentv/results/runs] --> Publish[Result publisher]
+  Publish --> GitIndex[agentv/results/v1 runs metadata]
+  Publish --> GitArtifacts[agentv/artifacts/v1 artifact sidecar]
+  Publish --> Oplog[agentv/oplog/v1 mutable ops]
+  Publish --> Bucket[(B2 S3-compatible bucket)]
+
+  GitIndex --> Dashboard[Dashboard and CLI readers]
+  GitArtifacts --> Resolver[Artifact resolver]
+  Bucket --> Resolver
+  Oplog --> Tags[Derived tag set and future mutable views]
+  Resolver --> Dashboard
+
+  GitIndex -. hybrid metadata .-> Bucket
+  Bucket -. blob-native manifest .-> Dashboard
+```
+
+### Mode Matrix
+
+| Mode | Canonical index/listing | Artifact payloads | Mutable ops | Git dependency |
+| --- | --- | --- | --- | --- |
+| `git-native` | `git ls-tree -r agentv/results/v1 -- runs/` plus `git cat-file --batch` for `benchmark.json` | `agentv/artifacts/v1` stores payload bytes | `agentv/oplog/v1` | Required |
+| `hybrid` | Same primary git ref as `git-native` | Object storage stores selected payload bytes; git stores locators under the artifact namespace | `agentv/oplog/v1` | Required for index/oplog |
+| `blob-native` | Bucket manifest under the results namespace, with `ListObjectsV2` fallback by prefix | Object storage stores all payloads | Bucket oplog prefix | None |
+
+### Logical Namespace Shape
+
+```text
+agentv/results/v1
+  runs/<experiment>/<timestamp>/benchmark.json
+  runs/<experiment>/<timestamp>/index.jsonl
+  runs/<experiment>/<timestamp>/<test-artifacts except moved heavy payloads>
+  metadata/runs/<experiment>/<timestamp>/materialized-tags.json
+
+agentv/artifacts/v1
+  transcripts/<experiment>/<timestamp>/<test-key>/transcript.jsonl
+  raw-logs/<experiment>/<timestamp>/<test-key>/<source>.jsonl
+  screenshots/<experiment>/<timestamp>/<test-key>/<name>.png
+
+agentv/oplog/v1
+  actors/<actor-id>/<sequence-or-time>-<nonce>.json
+```
+
+For blob-native mode, these are bucket prefixes rather than git refs. The prefix shape
+should stay recognizable so readers can share resolver logic across modes.
+
+---
+
+## Section Specs
+
+### 1. Storage Backend Abstraction And Modes
+
+**Decision:** Add a narrow storage abstraction around listing, publishing,
+materializing artifacts, resolving artifact bytes, syncing, applying retention, and
+reading oplog entries. Keep existing git helpers as the first adapter rather than
+rewriting all results code at once.
+
+**File-level plan:**
+
+- `packages/core/src/evaluation/results-repo.ts`
+  - Keep `DEFAULT_RESULTS_BRANCH`, deterministic genesis, `listGitRuns()`,
+    `materializeGitRun()`, and `directPushResults()` as the git adapter's core.
+  - Extract or wrap adapter-facing functions instead of renaming them in the first
+    implementation slice.
+- `packages/core/src/evaluation/loaders/config-loader.ts`
+  - Extend `ResultsConfig` and `parseResultsConfig()` with `storage_mode` and
+    `object_store`.
+  - Preserve current `repo_url`, `repo_path`, `branch`, `remote`, `path`, and
+    `sync` behavior for `git-native`.
+- `packages/core/src/projects.ts`
+  - Add matching project-registry YAML and internal fields if Dashboard project
+    bindings can configure hybrid/blob-native storage.
+- New core files, names to finalize during implementation:
+  - `packages/core/src/evaluation/results-storage.ts` for shared interfaces.
+  - `packages/core/src/evaluation/results-git-storage.ts` for the git adapter if
+    extraction from `results-repo.ts` becomes large.
+  - `packages/core/src/evaluation/results-object-storage.ts` for S3-compatible
+    primitives.
+- `apps/cli/src/commands/results/remote.ts`
+  - Route `listMergedResultFiles()`, `getRemoteResultsStatus()`,
+    `ensureRemoteRunAvailable()`, and `maybeAutoExportRunArtifacts()` through the
+    normalized adapter.
+- `apps/cli/src/commands/results/serve.ts`
+  - Route remote run listing, file reads, and tag mutations through storage-resolved
+    metadata rather than assuming a git materialized path exists.
+
+**Per-mode listing/index strategy:**
+
+- `git-native`: list `runs/**/benchmark.json` with `git ls-tree`; batch-read
+  benchmark blobs with `git cat-file --batch`; materialize run details lazily with
+  `materializeGitRun()`.
+- `hybrid`: list from the same git ref and read the same `benchmark.json` blobs.
+  Artifact locators in `index.jsonl` or sidecar manifests decide whether bytes come
+  from git artifacts or object storage.
+- `blob-native`: read a compact run manifest from bucket storage first. If the
+  manifest is missing or stale, fall back to `ListObjectsV2` over
+  `runs/**/benchmark.json`-equivalent objects, rebuild the manifest, and continue.
+  Use continuation tokens because S3 listing returns a bounded page per request.
+
+**Test plan:**
+
+- `packages/core/test/evaluation/results-storage.test.ts`
+  - Normalizes missing storage mode to `git-native`.
+  - Rejects incompatible config combinations, such as `blob-native` with `repo_path`
+    as a hard dependency.
+  - Proves the adapter interface can list runs in all modes from fixtures.
+- `packages/core/test/evaluation/results-repo.test.ts`
+  - Existing git-native tests must keep passing.
+  - Add coverage that `git-native` listing remains one `runs/**/benchmark.json`
+    tree scan, not a committed index file.
+- `apps/cli/test/commands/results/serve.test.ts`
+  - Dashboard `/api/runs` response shape stays stable across adapter-backed sources.
+
+**Acceptance:**
+
+- A dependent implementation bead can add a new storage adapter without changing
+  Dashboard components.
+- Existing `results.repo_path` and `results.repo_url` configs still publish and list
+  runs as `git-native`.
+- Blob-native mode has no code path that shells out to `git`.
+
+### 2. Git-Native Layout
+
+**Decision:** Keep one primary results branch, one artifact sidecar ref, and one oplog
+ref. Do not add windowed or per-run branches. Do not shard paths before measurement.
+
+**File-level plan:**
+
+- `packages/core/src/evaluation/results-repo.ts`
+  - Keep `DEFAULT_RESULTS_BRANCH = 'agentv/results/v1'`.
+  - Add constants for the artifact and oplog refs:
+    `agentv/artifacts/v1` and `agentv/oplog/v1`.
+  - Add a shared test assertion that all three refs pass `git check-ref-format`
+    and no ref is a prefix parent or child of another.
+  - Extend safe-path staging to include only owned top-level paths on each ref.
+  - Keep `createResultsGenesisCommit()` and `createOrphanResultsBranch()` behavior
+    for any new git storage refs so independent clients converge on the same root.
+  - Keep `commitResultsRunWithTemporaryIndex()` for primary run commits.
+  - Add artifact-ref and oplog-ref commit helpers only if sharing the temporary-index
+    machinery remains simple.
+- `apps/cli/src/commands/results/remote.ts`
+  - Keep `getResultsStorageRef()` returning the primary ref for run listing.
+  - Add resolver access to artifact and oplog refs without changing remote run IDs.
+- `packages/core/test/evaluation/results-repo.test.ts`
+  - Add deterministic genesis tests for the artifact and oplog refs if they are
+    created by separate helper functions.
+  - Add tests that two clients publishing to `agentv/artifacts/v1` converge
+    rather than minting divergent orphan roots.
+
+**Layout rules:**
+
+- Primary ref `agentv/results/v1`:
+  - Owns `runs/**` and lightweight materialized metadata.
+  - Lists runs only through `runs/**/benchmark.json`.
+- Artifact ref `agentv/artifacts/v1`:
+  - Owns payload classes under `transcripts/`, `raw-logs/`, and `screenshots/`.
+  - May store payload bytes in `git-native`.
+  - May store locator manifests in `hybrid`.
+- Oplog ref `agentv/oplog/v1`:
+  - Owns append-only operation records under `actors/**`.
+  - Is never used for immutable run payloads.
+
+**Test plan:**
+
+- Unit test constants and normalized default branch.
+- Integration test with a temporary repo that publishes:
+  - one run to `agentv/results/v1`;
+  - one transcript payload to `agentv/artifacts/v1`;
+  - one tag operation to `agentv/oplog/v1`.
+- Assert all three refs can coexist in one temporary repo because none is a
+  path-prefix of another.
+- Assert the source checkout branch does not switch.
+- Assert no `agentv/results/v1/<window>` or `agentv/results/run/<id>` refs are created.
+
+**Acceptance:**
+
+- `git for-each-ref refs/heads/agentv/results` shows only the v1 primary ref and the
+  two named sidecar refs for completed-result storage.
+- Run listing performance is measured against realistic data before any path sharding
+  proposal is accepted.
+
+### 3. Retention, Compaction, And Transcript Migration
+
+**Decision:** Retention removes live references first; compaction is an explicit
+maintenance action because git history and object-store versioning can keep old bytes
+after logical deletion.
+
+**File-level plan:**
+
+- New core file, likely `packages/core/src/evaluation/results-retention.ts`
+  - Evaluate retention policy against normalized run metadata.
+  - Produce a deletion plan for primary run paths, artifact sidecar paths, oplog
+    materializations, and object-store payloads.
+  - Keep policy evaluation pure so git and bucket adapters can execute it.
+- `packages/core/src/evaluation/results-repo.ts`
+  - Add git deletion commits for `runs/**`, `metadata/runs/**`, and artifact-ref
+    prefixes.
+  - Add optional compaction helpers only after logical pruning exists.
+- `packages/core/src/evaluation/run-artifacts.ts`
+  - Preserve logical `transcript_path` values while supporting external artifact
+    locators.
+  - Add optional artifact locator metadata in `index.jsonl` rather than replacing the
+    existing path fields.
+- `apps/cli/src/commands/results/remote.ts`
+  - Teach `ensureRemoteRunAvailable()` and future artifact resolvers to fetch a
+    transcript from `agentv/artifacts/v1` when the run-local path is a logical
+    reference.
+- `apps/cli/src/commands/results/serve.ts`
+  - Keep file API responses stable for transcript JSONL, whether bytes are local,
+    materialized from git, or streamed from object storage.
+
+**Git/hybrid retention rules:**
+
+- Logical prune commit:
+  - Removes selected `runs/<experiment>/<timestamp>/**` from `agentv/results/v1`.
+  - Removes selected artifact paths from `agentv/artifacts/v1` or replaces
+    hybrid locator records with tombstones.
+  - Appends retention operations to oplog when mutable state is affected.
+- Compaction:
+  - Explicitly rewrites or re-roots storage refs after a backup/export checkpoint.
+  - Never runs automatically during `agentv eval`.
+  - Requires remote coordination because old commits and blobs can disappear after
+    garbage collection.
+
+**Bucket lifecycle rules:**
+
+- Hybrid:
+  - Keep object payloads at least as long as primary git metadata points to them.
+  - Use object lifecycle for expired payload classes after the git retention plan
+    removes or tombstones their locators.
+- Blob-native:
+  - Bucket lifecycle can expire artifact payload prefixes independently only when the
+    bucket manifest and oplog policy mark them expired.
+  - Keep index manifests longer than payloads when publication or audit needs summary
+    history without large transcripts.
+
+**Transcript migration:**
+
+- Existing runs may have `transcript_path` pointing at
+  `<artifact_dir>/outputs/transcript.jsonl`.
+- Migration copies transcript bytes to
+  `agentv/artifacts/v1:transcripts/<experiment>/<timestamp>/<test-key>/transcript.jsonl`
+  or the matching object-store key.
+- `index.jsonl` keeps `transcript_path` as the logical path and gains optional locator
+  metadata with `backend`, `ref` or bucket namespace, `path`, `sha256`, and
+  `size_bytes`.
+- Readers resolve the logical path through locator metadata first and fall back to the
+  run-local file for historical bundles.
+
+**Test plan:**
+
+- `packages/core/test/evaluation/results-retention.test.ts`
+  - Selects old runs by timestamp and keeps protected latest runs.
+  - Plans transcript sidecar deletion only after primary metadata no longer points to it.
+  - Produces separate plans for git-native, hybrid, and blob-native modes.
+- `packages/core/test/evaluation/run-artifacts.test.ts`
+  - Verifies optional artifact locator fields are snake_case and do not break
+    `parseJsonlResults()`.
+- `apps/cli/test/commands/results/serve.test.ts`
+  - Serves a transcript from sidecar/object locator with the same raw/download
+    behavior as a run-local transcript.
+
+**Acceptance:**
+
+- Retention can remove old live runs without breaking listing for retained runs.
+- A transcript migrated under `agentv/artifacts/v1` remains viewable through
+  the existing Dashboard file API.
+- Compaction cannot run implicitly as a side effect of publish, sync, or Dashboard
+  polling.
+
+### 4. Compact Derived Publication Export
+
+**Decision:** Publication output is a derived export over the canonical run bundle.
+It does not require an `eval.txt` artifact, and it does not become the source of truth
+for rerun, comparison, grading, or adapter ingestion.
+
+**File-level plan:**
+
+- `apps/cli/src/commands/results/export.ts`
+  - Keep the current run-workspace export path aligned with
+    `writeArtifactsFromResults()`.
+  - Add or route to a publication export mode only if the CLI surface stays narrow.
+- New CLI/core files if a separate command reads cleaner:
+  - `apps/cli/src/commands/results/publication.ts`
+  - `packages/core/src/evaluation/results-publication.ts`
+- `packages/core/src/evaluation/run-artifacts.ts`
+  - Remains the source for `benchmark.json`, `index.jsonl`, and per-test artifact
+    schemas.
+- `apps/web/src/content/docs/docs/tools/results.mdx`
+  - Document that publication export reads completed run artifacts and does not
+    require `eval.txt`.
+
+**Publication contract:**
+
+- Inputs:
+  - completed run workspace;
+  - `index.jsonl` manifest;
+  - `benchmark.json`;
+  - optional sidecar-resolved artifact references for selected public payloads.
+- Outputs:
+  - compact `benchmark.json` and `index.jsonl` or a derived `publication.json`;
+  - optional static assets for selected summaries;
+  - no required `eval.txt`.
+- Privacy:
+  - Default export excludes raw prompts, tool args/results, transcripts, screenshots,
+    and raw logs unless the user opts into a payload class.
+
+**Test plan:**
+
+- `apps/cli/test/commands/results/export.test.ts`
+  - Publication export succeeds with only `benchmark.json` and `index.jsonl`.
+  - Publication export fails clearly when the manifest is not an AgentV result row.
+  - Payload opt-in includes only selected sidecar files.
+- `apps/cli/test/commands/results/report.test.ts`
+  - Existing single-run HTML report remains unaffected.
+
+**Acceptance:**
+
+- A publication artifact can be generated from a run bundle that has no `eval.txt`.
+- The exported publication states or embeds enough summary data for readers without
+  replacing the canonical run bundle.
+- External viewers consume publication output as a projection, not as an AgentV run
+  workspace.
+
+### 5. Mutable Run/Result Operations Via Append-Only Oplog
+
+**Decision:** Implement mutable operations as per-actor append-only operation records.
+Tags are the first materialized view and use add-wins set semantics.
+
+**File-level plan:**
+
+- `apps/cli/src/commands/results/remote-metadata.ts`
+  - Preserve current `metadata/runs/**/tags.json` behavior as a compatibility layer.
+  - Add read/write paths that append oplog operations before or instead of writing
+    materialized overlays.
+- New core file, likely `packages/core/src/evaluation/results-oplog.ts`
+  - Define operation wire records with snake_case fields.
+  - Define actor id, sequence/nonce, operation id, target run id, operation kind,
+    payload, created timestamp, and optional causal metadata.
+  - Implement add-wins tag projection.
+- `packages/core/src/evaluation/results-repo.ts`
+  - Add git append helpers for `agentv/oplog/v1`.
+- `apps/cli/src/commands/results/serve.ts`
+  - Route tag set, clear, and read endpoints through oplog projection for remote
+    runs once the adapter is available.
+- `apps/dashboard/src/lib/run-list-actions.ts` and tag-related component tests
+  - Keep UI semantics stable: tags remain free-form chips with existing limits.
+
+**Operation shape:**
+
+```yaml
+schema_version: agentv.oplog.v1
+op_id: actor-a/2026-06-21T10-00-00-000Z-01hx
+actor_id: actor-a
+created_at: "2026-06-21T10:00:00.000Z"
+target:
+  run_id: with-skills::2026-06-17T10-00-00-000Z
+kind: tag_add
+payload:
+  tag: release-candidate
+```
+
+For tag projection, removals record `tag_remove` with the tag value. Concurrent add
+and remove resolves to present when the add operation is not causally observed by the
+remove. That is the add-wins rule and prevents a stale clear from deleting another
+actor's later tag addition.
+
+**Where oplog lives by mode:**
+
+- `git-native`: `agentv/oplog/v1` git ref, under
+  `actors/<actor-id>/<sequence-or-time>-<nonce>.json`.
+- `hybrid`: same git oplog ref, because git remains the metadata authority.
+- `blob-native`: object-store prefix
+  `oplog/actors/<actor-id>/<sequence-or-time>-<nonce>.json`, with a bucket manifest
+  for efficient projection rebuilds.
+
+**Test plan:**
+
+- `packages/core/test/evaluation/results-oplog.test.ts`
+  - Projects add-wins tags from add/remove operations.
+  - Handles duplicate op ids idempotently.
+  - Keeps operations from different actors without content conflicts.
+  - Rejects non-snake_case or malformed operation records.
+- `apps/cli/test/commands/results/remote-metadata.test.ts`
+  - Existing overlay tests keep passing.
+  - New oplog-backed tag write produces the same returned `RemoteRunTagState`.
+- `apps/cli/test/commands/results/serve.test.ts`
+  - Tag API returns effective tags after concurrent actor operations.
+
+**Acceptance:**
+
+- A remote tag edit appends an operation and does not rewrite immutable run artifacts.
+- Concurrent tag adds from two actors both appear in the materialized tag set.
+- Blob-native tag edits work without git.
+
+### 6. Object-Storage Tier: Backblaze B2 Through S3-Compatible API
+
+**Decision:** Use Backblaze B2 only through the S3-compatible API with a standard S3
+client. The B2 Native API is out of scope for this storage tier.
+
+**File-level plan:**
+
+- `packages/core/package.json`
+  - Add `@aws-sdk/client-s3` as a direct dependency if object storage code lands in
+    core. Do not rely on transitive dependencies from provider packages.
+- `packages/core/src/evaluation/results-object-storage.ts`
+  - Create the S3-compatible client from endpoint, region, bucket, prefix, and
+    environment-provided credentials.
+  - Implement `put`, `get`, `head`, `delete`, multipart threshold decisions, and
+    paginated listing.
+  - Use `ListObjectsV2` continuation tokens for listing.
+- `packages/core/src/evaluation/loaders/config-loader.ts`
+  - Parse object-store config with snake_case fields:
+
+```yaml
+results:
+  storage_mode: hybrid
+  repo_path: .
+  object_store:
+    provider: s3-compatible
+    endpoint: ${AGENTV_RESULTS_S3_ENDPOINT}
+    region: ${AGENTV_RESULTS_S3_REGION}
+    bucket: ${AGENTV_RESULTS_S3_BUCKET}
+    prefix: agentv/results/v1
+```
+
+- `packages/core/src/evaluation/hooks.ts`
+  - Reuse existing `before_session` secret-loading support where possible. A project
+    can run BWS before AgentV commands and inject `AGENTV_RESULTS_S3_*` variables.
+- `apps/web/src/content/docs/docs/tools/dashboard.mdx` and
+  `apps/web/src/content/docs/docs/tools/results.mdx`
+  - Document that BWS is a local/CI secret source and resolved values must not be
+    committed.
+
+**B2 specifics:**
+
+- Endpoint format is `https://s3.<region>.backblazeb2.com`.
+- Authentication uses S3 Signature V4.
+- Application key id maps to S3 access key id; application key maps to S3 secret key.
+- Configure standard S3 endpoint override, region, and credentials. Do not call B2
+  Native API endpoints.
+
+**BWS secret boundary:**
+
+- Recommended local/CI flow:
+  - BWS authenticates with `BWS_ACCESS_TOKEN`.
+  - BWS injects or exports the S3 endpoint, region, bucket, access key id, and secret
+    access key into environment variables before AgentV runs.
+  - AgentV config interpolates variable names or reads environment variables directly.
+- Never persist resolved BWS values into `benchmark.json`, `index.jsonl`, oplog records,
+  Dashboard responses, docs examples, or project registry files.
+
+**Test plan:**
+
+- `packages/core/test/evaluation/results-object-storage.test.ts`
+  - Uses a fake S3 client or local test double to verify `PutObject`, `GetObject`,
+    `HeadObject`, `DeleteObject`, and paginated `ListObjectsV2` behavior.
+  - Verifies credentials are read from env and are not serialized into manifests.
+  - Verifies B2 endpoint config is passed as an S3 endpoint override.
+- `packages/core/test/evaluation/loaders/config-loader.test.ts`
+  - Parses object-store config and rejects missing bucket/endpoint for hybrid or
+    blob-native modes.
+- `apps/cli/test/commands/results/serve.test.ts`
+  - Streams a sidecar artifact from object storage through the existing file API.
+
+**Acceptance:**
+
+- Hybrid mode can write a transcript payload to B2 through the S3-compatible client
+  while listing the run from git.
+- Blob-native mode can list runs from bucket metadata without invoking git.
+- No code imports a B2-native SDK or calls B2-native API-specific operations.
+- No test fixture or docs example contains resolved secret values.
+
+---
+
+## Implementation Units
+
+### U1. Results Storage Config And Adapter Boundary
+
+- **Goal:** Add the storage-mode config and adapter interface that later units can use.
+- **Requirements:** R1, R2, R3, R4, R19
+- **Dependencies:** None
+- **Files:** `packages/core/src/evaluation/loaders/config-loader.ts`,
+  `packages/core/src/projects.ts`, `packages/core/src/evaluation/results-storage.ts`,
+  `packages/core/test/evaluation/loaders/config-loader.test.ts`,
+  `packages/core/test/projects.test.ts`,
+  `packages/core/test/evaluation/results-storage.test.ts`
+- **Approach:** Introduce storage mode without overloading `results.mode: github`.
+  Normalize missing `storage_mode` to `git-native`, keep current git fields valid, and
+  define adapter methods for listing, publishing, materializing, artifact reads,
+  oplog reads, and retention.
+- **Patterns to follow:** `normalizeResultsConfig()` in
+  `packages/core/src/evaluation/results-repo.ts`; `fromYaml()` and `toYaml()` in
+  `packages/core/src/projects.ts`; snake_case boundary rules in `.agents/conventions.md`.
+- **Test scenarios:**
+  - Given current `repo_path: .` config with no storage mode, normalization returns
+    `git-native`.
+  - Given `storage_mode: hybrid`, parser requires valid git configuration and
+    `object_store`.
+  - Given `storage_mode: blob-native`, parser accepts `object_store` without
+    `repo_path` or `repo_url`.
+  - Given `blob-native` config with no object store, parser rejects it with a clear
+    warning.
+  - Given project registry results config with object-store fields, YAML load/save
+    preserves snake_case on disk and camelCase internally.
+  - Given legacy `mode: github`, git-native config still works and does not imply
+    GitHub-only storage.
+- **Verification:** Existing git-native publish/list tests still compile against the
+  normalized config, and new mode tests do not require real network access.
+
+### U2. Git Refs, Sidecar Constants, And Artifact Locator Support
+
+- **Goal:** Pin the three git refs and add resolver support for sidecar artifacts.
+- **Requirements:** R5, R6, R7, R8, R9, R12
+- **Dependencies:** U1
+- **Files:** `packages/core/src/evaluation/results-repo.ts`,
+  `packages/core/src/evaluation/run-artifacts.ts`,
+  `apps/cli/src/commands/results/remote.ts`,
+  `apps/cli/src/commands/results/serve.ts`,
+  `packages/core/test/evaluation/results-repo.test.ts`,
+  `packages/core/test/evaluation/run-artifacts.test.ts`,
+  `apps/cli/test/commands/results/serve.test.ts`
+- **Approach:** Keep `agentv/results/v1` as the listable run ref. Add named constants
+  for artifact and oplog refs. Add optional artifact locator metadata while preserving
+  existing logical path fields such as `transcript_path`.
+- **Patterns to follow:** Current deterministic genesis functions in `results-repo.ts`;
+  `buildIndexArtifactEntry()` and `buildResultIndexArtifact()` in `run-artifacts.ts`;
+  existing transcript file API tests in `serve.test.ts`.
+- **Test scenarios:**
+  - Given a run with a sidecar transcript locator, Dashboard raw file endpoint returns
+    the same text/plain response as a local transcript file.
+  - Given no sidecar locator, historical run-local `transcript_path` still resolves.
+  - Given two clients create an artifact ref, the genesis commit is deterministic.
+  - Given a publish, no per-run or windowed result refs are created.
+- **Verification:** `listGitRuns()` output is unchanged for runs that do not use sidecar
+  payloads.
+
+### U3. Retention And Compaction Planner
+
+- **Goal:** Add retention planning that can prune runs and sidecars without implicit
+  history compaction.
+- **Requirements:** R10, R11, R12
+- **Dependencies:** U1, U2
+- **Files:** `packages/core/src/evaluation/results-retention.ts`,
+  `packages/core/src/evaluation/results-repo.ts`,
+  `packages/core/src/evaluation/results-object-storage.ts`,
+  `packages/core/test/evaluation/results-retention.test.ts`,
+  `packages/core/test/evaluation/results-repo.test.ts`
+- **Approach:** Build a pure planner first. Execution adapters take the plan and create
+  git deletion commits or bucket deletion batches. Keep compaction as a separate
+  explicit operation with stronger confirmation and documentation.
+- **Patterns to follow:** Safe path filters in `isSafeResultsRepoPath()` and
+  `existingTrackedResultsDirs()`; project sync's blocked status reporting.
+- **Test scenarios:**
+  - Given runs older than a retention threshold, planner selects primary run paths and
+    sidecar paths for deletion.
+  - Given a sidecar transcript still referenced by a retained run, planner keeps it.
+  - Given object lifecycle policy shorter than metadata retention, planner reports the
+    mismatch instead of approving deletion.
+  - Given compaction is not requested, no history rewrite operation is emitted.
+- **Verification:** Retention execution can be tested against a temporary git repo and a
+  fake object store without touching real remotes.
+
+### U4. Publication Export Projection
+
+- **Goal:** Add the compact publication export without requiring `eval.txt`.
+- **Requirements:** R13
+- **Dependencies:** U1
+- **Files:** `apps/cli/src/commands/results/export.ts`,
+  `apps/cli/src/commands/results/index.ts`,
+  `packages/core/src/evaluation/results-publication.ts`,
+  `apps/web/src/content/docs/docs/tools/results.mdx`,
+  `apps/cli/test/commands/results/export.test.ts`
+- **Approach:** Keep publication export read-only over completed run artifacts. Use
+  `parseJsonlResults()` and `benchmark.json` metadata as inputs. If a new command is
+  clearer than another export option, keep it under `agentv results` but document it as
+  projection-only.
+- **Patterns to follow:** `loadExportSource()` and `deriveOutputDir()` in
+  `apps/cli/src/commands/results/export.ts`; `results report` docs for static output
+  framing.
+- **Test scenarios:**
+  - Given a run with `index.jsonl` and `benchmark.json`, publication export succeeds
+    with no `eval.txt`.
+  - Given an invalid JSONL input that is not an AgentV result row, publication export
+    fails with the existing result-row schema guidance.
+  - Given transcript payloads exist, publication export excludes them by default.
+  - Given payload opt-in for transcripts, publication export includes only selected
+    sidecar-resolved transcript files.
+- **Verification:** The generated publication output can be inspected from disk and does
+  not modify the source run workspace.
+
+### U5. Oplog And Add-Wins Tag Projection
+
+- **Goal:** Replace direct mutable metadata writes with append-only operations and a tag
+  projection.
+- **Requirements:** R14, R15, R16
+- **Dependencies:** U1, U2
+- **Files:** `packages/core/src/evaluation/results-oplog.ts`,
+  `packages/core/src/evaluation/results-repo.ts`,
+  `apps/cli/src/commands/results/remote-metadata.ts`,
+  `apps/cli/src/commands/results/serve.ts`,
+  `apps/dashboard/src/lib/run-list-actions.ts`,
+  `packages/core/test/evaluation/results-oplog.test.ts`,
+  `apps/cli/test/commands/results/remote-metadata.test.ts`,
+  `apps/cli/test/commands/results/serve.test.ts`,
+  `apps/dashboard/src/lib/run-list-actions.test.ts`
+- **Approach:** Append `tag_add` and `tag_remove` operations per actor, materialize the
+  effective tag set for read performance, and keep current Dashboard tag UX stable.
+- **Patterns to follow:** Current `RemoteRunTagState` shape and `metadata/runs/**`
+  overlay path handling in `remote-metadata.ts`.
+- **Test scenarios:**
+  - Given two actors add different tags concurrently, both tags are visible.
+  - Given one actor clears tags while another later adds a tag, the later add wins.
+  - Given duplicate operation ids, projection is idempotent.
+  - Given malformed operation JSON, projection reports a warning and skips the record.
+  - Given blob-native mode, tag operations are stored under bucket oplog prefix and no
+    git command runs.
+- **Verification:** Dashboard tag endpoints return the same response shape as today.
+
+### U6. S3-Compatible Object Store And B2 Integration
+
+- **Goal:** Add the object-store tier used by hybrid and blob-native modes.
+- **Requirements:** R2, R3, R4, R11, R17, R18, R19
+- **Dependencies:** U1
+- **Files:** `packages/core/package.json`, `bun.lock`,
+  `packages/core/src/evaluation/results-object-storage.ts`,
+  `packages/core/src/evaluation/loaders/config-loader.ts`,
+  `packages/core/test/evaluation/results-object-storage.test.ts`,
+  `packages/core/test/evaluation/loaders/config-loader.test.ts`,
+  `apps/web/src/content/docs/docs/tools/dashboard.mdx`,
+  `apps/web/src/content/docs/docs/tools/results.mdx`
+- **Approach:** Add a standard S3 client wrapper with endpoint override support. Keep
+  B2-specific knowledge in docs/config examples and endpoint validation, not in a
+  B2-native SDK layer.
+- **Patterns to follow:** Existing env interpolation in config loader; `hooks.before_session`
+  parsing in `packages/core/src/evaluation/hooks.ts`; secret-redaction posture in
+  task-bundle tests.
+- **Test scenarios:**
+  - Given B2-style endpoint, region, bucket, and env credentials, object client is
+    configured as S3-compatible.
+  - Given a paginated object listing, all pages are read using continuation tokens.
+  - Given missing credentials, error message names variables but not values.
+  - Given a sidecar upload, the stored locator includes checksum and size but no secret.
+  - Given blob-native listing, run manifests load from bucket without git.
+- **Verification:** Unit tests use a fake S3 client; no real B2 bucket is needed for CI.
+
+---
+
+## System-Wide Impact
+
+- **Core:** `results-repo.ts` stops being the only remote-results boundary and becomes
+  the git adapter or wrapped by one.
+- **CLI:** `results export`, auto-publish, and Dashboard server routes need adapter
+  routing but should preserve existing user-facing response shapes.
+- **Dashboard:** The UI should not learn storage-specific concepts. It consumes the same
+  run list, file, and tag API responses.
+- **Docs:** Results and Dashboard docs need updated wording because current docs still
+  imply only git-backed remote results and mention committed `.agentv/results/**` paths
+  in places that now flatten on-branch to `runs/**`.
+- **Secrets:** Object-store credentials must stay in environment or local secret-loading
+  flows. The implementation must not serialize them into artifacts or Dashboard JSON.
+
+---
+
+## Risks And Mitigations
+
+| Risk | Mitigation |
+| --- | --- |
+| Storage abstraction balloons beyond current need | Keep interface methods tied to existing results operations: list, publish, materialize/read artifact, sync, retention, oplog. |
+| Hybrid locators break old readers | Keep existing logical path fields and add optional locator metadata. Old bundles keep local files; new readers prefer locators. |
+| Git compaction surprises collaborators | Make compaction explicit and separate from retention. Document backup and remote coordination requirements before implementation. |
+| Blob-native listing becomes expensive | Use a bucket manifest as the fast path and `ListObjectsV2` as a rebuild/fallback path. Add sharding only after measurement. |
+| Secrets leak through config or artifacts | Use env interpolation and BWS injection only; tests assert secret values are absent from manifests, docs fixtures, and errors. |
+| B2 differences from AWS S3 leak into core | Use standard S3 client operations and endpoint override. Keep B2-specific docs limited to endpoint/credential mapping. |
+
+---
+
+## Acceptance Checklist
+
+- [ ] Spec includes one section each for storage modes, git-native layout,
+  retention/compaction, publication export, oplog, and object storage.
+- [ ] All refs are pinned exactly: `agentv/results/v1`, `agentv/artifacts/v1`,
+  and `agentv/oplog/v1`.
+- [ ] Shared ref tests assert the three refs are valid Git refnames and cannot
+  prefix-conflict.
+- [ ] The artifact sidecar is called `artifacts`, not `artifact-blobs` or `blob`.
+- [ ] The plan has no windowed or per-run branches.
+- [ ] Path sharding is deferred until realistic measurement proves need.
+- [ ] AgentV artifacts remain canonical; Dashboard, Hugging Face, Phoenix, B2, and
+  GitHub are projections/viewers/storage backends.
+- [ ] File/function-level implementation guidance names current result repo, remote,
+  serve, export, artifact-writer, and Dashboard surfaces.
+- [ ] Test plan covers core, CLI, Dashboard, and docs-facing behavior.
+- [ ] Dependent beads `av-dcs`, `av-kxa`, `av-8un`, `av-dsc`, and `av-thr` can pick
+  up scoped implementation units without inventing storage decisions.
+
+---
+
+## Sources And Research
+
+- `docs/plans/git-native-results.md` for the current git-tree-as-index contract.
+- `docs/plans/results-branch-layout.md` for flattened `runs/` and `metadata/runs/`
+  layout.
+- `packages/core/src/evaluation/results-repo.ts` for deterministic genesis,
+  `directPushResults()`, `listGitRuns()`, and `materializeGitRun()`.
+- `packages/core/src/evaluation/run-artifacts.ts` and
+  `apps/cli/src/commands/eval/artifact-writer.ts` for `benchmark.json`,
+  `index.jsonl`, `outputs/trace.json`, and transcript sidecars.
+- `apps/cli/src/commands/results/remote.ts`,
+  `apps/cli/src/commands/results/remote-metadata.ts`,
+  `apps/cli/src/commands/results/serve.ts`, and
+  `apps/cli/src/commands/results/export.ts` for current CLI/Dashboard remote,
+  metadata, serving, and export behavior.
+- `docs/adr/2026-06-18-opik-post-run-export-boundary.md` for the adapter boundary
+  that keeps AgentV run bundles canonical.
+- Backblaze B2 S3-compatible docs:
+  `https://www.backblaze.com/docs/cloud-storage-call-the-s3-compatible-api` and
+  `https://www.backblaze.com/apidocs/introduction-to-the-s3-compatible-api`.
+- AWS S3 `ListObjectsV2` docs:
+  `https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjectsV2.html`.
+- Bitwarden Secrets Manager CLI docs:
+  `https://bitwarden.com/help/secrets-manager-cli/`.
diff --git a/packages/core/src/evaluation/orchestrator.ts b/packages/core/src/evaluation/orchestrator.ts
index 41ee9610e..213940190 100644
--- a/packages/core/src/evaluation/orchestrator.ts
+++ b/packages/core/src/evaluation/orchestrator.ts
@@ -129,6 +129,21 @@ function usesFileReferencePrompt(provider: Provider): boolean {
   return isAgentProvider(provider) || provider.kind === 'cli';
 }
 
+function extractProviderRawLogPath(response: ProviderResponse): string | undefined {
+  const raw = response.raw;
+  if (!raw || typeof raw !== 'object' || Array.isArray(raw)) {
+    return undefined;
+  }
+
+  const logFile = (raw as Record<string, unknown>).logFile;
+  if (typeof logFile !== 'string') {
+    return undefined;
+  }
+
+  const trimmed = logFile.trim();
+  return trimmed.length > 0 ? trimmed : undefined;
+}
+
 interface EvaluationRuntimeOptions {
   readonly target: ResolvedTarget;
   readonly targets?: readonly TargetDefinition[];
@@ -1588,6 +1603,7 @@ async function runBatchEvaluation(options: {
     const tokenUsage = merged?.tokenUsage;
     const startTime = merged?.startTime;
     const endTime = merged?.endTime;
+    const rawProviderLogPath = extractProviderRawLogPath(providerResponse);
 
     // Extract candidate from last assistant message in output
     const candidate = extractLastAssistantContent(output);
@@ -1615,6 +1631,7 @@ async function runBatchEvaluation(options: {
         tokenUsage,
         startTime,
         endTime,
+        rawProviderLogPath,
         targetResolver,
         availableTargets,
         verbose,
@@ -1982,6 +1999,7 @@ export async function runEvalCase(options: RunEvalCaseOptions): Promise<Evaluati
   const tokenUsage = merged?.tokenUsage;
   const startTime = merged?.startTime;
   const endTime = merged?.endTime;
+  const rawProviderLogPath = extractProviderRawLogPath(providerResponse);
 
   // Extract candidate from last assistant message in output
   const candidate = extractLastAssistantContent(output);
@@ -2104,6 +2122,7 @@ export async function runEvalCase(options: RunEvalCaseOptions): Promise<Evaluati
       tokenUsage,
       startTime,
       endTime,
+      rawProviderLogPath,
       targetResolver,
       availableTargets,
       fileChanges,
@@ -2375,6 +2394,7 @@ async function evaluateCandidate(options: {
   readonly tokenUsage?: TokenUsage;
   readonly startTime?: string;
   readonly endTime?: string;
+  readonly rawProviderLogPath?: string;
   readonly targetResolver?: (name: string) => Provider | undefined;
   readonly availableTargets?: readonly string[];
   readonly fileChanges?: string;
@@ -2404,6 +2424,7 @@ async function evaluateCandidate(options: {
     tokenUsage,
     startTime,
     endTime,
+    rawProviderLogPath,
     targetResolver,
     availableTargets,
     fileChanges,
@@ -2514,6 +2535,7 @@ async function evaluateCandidate(options: {
     output: candidate,
     scores: scores,
     trace: evaluationTrace,
+    rawProviderLogPath,
     fileChanges,
     executionStatus: classifyQualityStatus(score.score, evalThreshold),
   };
diff --git a/packages/core/src/evaluation/result-artifact-contract.ts b/packages/core/src/evaluation/result-artifact-contract.ts
new file mode 100644
index 000000000..a8f122bf5
--- /dev/null
+++ b/packages/core/src/evaluation/result-artifact-contract.ts
@@ -0,0 +1,108 @@
+/**
+ * AgentV-owned result artifact contract.
+ *
+ * This module centralizes the git refs and portable pointer shapes used by run
+ * records. Local run workspaces still write their files under the existing
+ * per-result artifact directories; these pointers describe where those same
+ * AgentV-owned artifacts belong when projected to a results ref, sidecar ref,
+ * or object store.
+ */
+
+export const AGENTV_RESULTS_PRIMARY_REF = 'agentv/results/v1' as const;
+export const AGENTV_RESULTS_ARTIFACTS_REF = 'agentv/artifacts/v1' as const;
+export const AGENTV_RESULTS_OPLOG_REF = 'agentv/oplog/v1' as const;
+
+export const AGENTV_RESULTS_REFS = {
+  primary: AGENTV_RESULTS_PRIMARY_REF,
+  artifacts: AGENTV_RESULTS_ARTIFACTS_REF,
+  oplog: AGENTV_RESULTS_OPLOG_REF,
+} as const;
+
+export const CANONICAL_TRACE_ARTIFACT_PATH = 'outputs/trace.json' as const;
+export const CANONICAL_TRANSCRIPT_ARTIFACT_PATH = 'outputs/transcript.jsonl' as const;
+
+export const TRANSCRIPT_SCHEMA_VERSION = 'agentv.transcript.v1' as const;
+export const TRANSCRIPT_JSONL_MEDIA_TYPE = 'application/x-ndjson' as const;
+export const TRACE_JSON_MEDIA_TYPE = 'application/vnd.agentv.trace.v1+json' as const;
+
+export type AgentVResultsRefName = (typeof AGENTV_RESULTS_REFS)[keyof typeof AGENTV_RESULTS_REFS];
+
+export type ResultArtifactFamily =
+  | 'traces'
+  | 'transcripts'
+  | 'outputs'
+  | 'raw-logs'
+  | 'screenshots';
+
+export interface ResultArtifactPointer {
+  readonly ref: AgentVResultsRefName | string;
+  readonly key: string;
+  readonly objectVersion: string;
+  readonly path: string;
+  readonly sha256: string;
+  readonly size: number;
+  readonly schemaVersion: string;
+  readonly mediaType: string;
+  readonly family?: ResultArtifactFamily;
+}
+
+export interface ResultArtifactPointerWire {
+  readonly ref: AgentVResultsRefName | string;
+  readonly key: string;
+  readonly object_version: string;
+  readonly path: string;
+  readonly sha256: string;
+  readonly size: number;
+  readonly schema_version: string;
+  readonly media_type: string;
+  readonly family?: ResultArtifactFamily;
+}
+
+export type TranscriptArtifactPointer = ResultArtifactPointer & {
+  readonly schemaVersion: typeof TRANSCRIPT_SCHEMA_VERSION;
+  readonly mediaType: typeof TRANSCRIPT_JSONL_MEDIA_TYPE;
+  readonly family: 'transcripts';
+};
+
+export type TranscriptArtifactPointerWire = ResultArtifactPointerWire & {
+  readonly schema_version: typeof TRANSCRIPT_SCHEMA_VERSION;
+  readonly media_type: typeof TRANSCRIPT_JSONL_MEDIA_TYPE;
+  readonly family: 'transcripts';
+};
+
+export interface ResultArtifactPointersWire {
+  readonly trace?: ResultArtifactPointerWire;
+  readonly transcript?: TranscriptArtifactPointerWire;
+}
+
+export function toResultArtifactPointerWire(
+  pointer: ResultArtifactPointer,
+): ResultArtifactPointerWire {
+  return {
+    ref: pointer.ref,
+    key: pointer.key,
+    object_version: pointer.objectVersion,
+    path: pointer.path,
+    sha256: pointer.sha256,
+    size: pointer.size,
+    schema_version: pointer.schemaVersion,
+    media_type: pointer.mediaType,
+    family: pointer.family,
+  };
+}
+
+export function fromResultArtifactPointerWire(
+  pointer: ResultArtifactPointerWire,
+): ResultArtifactPointer {
+  return {
+    ref: pointer.ref,
+    key: pointer.key,
+    objectVersion: pointer.object_version,
+    path: pointer.path,
+    sha256: pointer.sha256,
+    size: pointer.size,
+    schemaVersion: pointer.schema_version,
+    mediaType: pointer.media_type,
+    family: pointer.family,
+  };
+}
diff --git a/packages/core/src/evaluation/result-row-schema.ts b/packages/core/src/evaluation/result-row-schema.ts
index fff7ef0e0..64a82e3e0 100644
--- a/packages/core/src/evaluation/result-row-schema.ts
+++ b/packages/core/src/evaluation/result-row-schema.ts
@@ -34,6 +34,7 @@ const RESULT_ROW_ALIASES = {
   gradingPath: 'grading_path',
   inputPath: 'input_path',
   outputPath: 'output_path',
+  rawProviderLogPath: 'raw_provider_log_path',
   responsePath: 'response_path',
   startTime: 'start_time',
   targetsPath: 'targets_path',
@@ -45,6 +46,10 @@ const RESULT_ROW_ALIASES = {
   workspacePath: 'workspace_path',
 } as const;
 
+const NEW_SNAKE_CASE_ONLY_FIELDS = {
+  artifactPointers: 'artifact_pointers',
+} as const;
+
 const TRACE_SUMMARY_ALIASES = {
   costUsd: 'cost_usd',
   durationMs: 'duration_ms',
@@ -149,6 +154,19 @@ function buildInvalidScoreError(context: {
   return new ResultRowSchemaError(`Missing or invalid score in result row${location}.`);
 }
 
+function buildSnakeCaseOnlyFieldError(
+  field: keyof typeof NEW_SNAKE_CASE_ONLY_FIELDS,
+  context: { lineNumber?: number; sourceLabel?: string },
+): ResultRowSchemaError {
+  const location = [
+    context.sourceLabel ? ` in ${context.sourceLabel}` : '',
+    context.lineNumber !== undefined ? ` at line ${context.lineNumber}` : '',
+  ].join('');
+  return new ResultRowSchemaError(
+    `Unsupported camelCase result row field "${field}"${location}. Use "${NEW_SNAKE_CASE_ONLY_FIELDS[field]}".`,
+  );
+}
+
 function looksLikeResultRow(value: Record<string, unknown>): boolean {
   return (
     typeof value.test_id === 'string' ||
@@ -169,6 +187,14 @@ export function normalizeResultRow(
     throw buildSchemaError(context);
   }
 
+  for (const field of Object.keys(
+    NEW_SNAKE_CASE_ONLY_FIELDS,
+  ) as (keyof typeof NEW_SNAKE_CASE_ONLY_FIELDS)[]) {
+    if (Object.hasOwn(value, field)) {
+      throw buildSnakeCaseOnlyFieldError(field, context);
+    }
+  }
+
   const normalized = normalizeKnownAliases(value, RESULT_ROW_ALIASES);
   if (normalized.trace !== undefined) {
     normalized.trace = normalizeTraceSummary(normalized.trace);
diff --git a/packages/core/src/evaluation/results-repo.ts b/packages/core/src/evaluation/results-repo.ts
index bbe511487..11f7f5bd2 100644
--- a/packages/core/src/evaluation/results-repo.ts
+++ b/packages/core/src/evaluation/results-repo.ts
@@ -15,6 +15,7 @@ import { promisify } from 'node:util';
 
 import { getAgentvDataDir } from '../paths.js';
 import type { ResultsConfig } from './loaders/config-loader.js';
+import { AGENTV_RESULTS_PRIMARY_REF } from './result-artifact-contract.js';
 
 const execFileAsync = promisify(execFile);
 // Local working-tree run workspace inside the eval repo. Local commands
@@ -32,7 +33,7 @@ const RESULTS_REPO_METADATA_DIR = 'metadata';
 const RESULTS_REPO_TRACKED_DIRS = [RESULTS_REPO_RUNS_DIR, RESULTS_REPO_METADATA_DIR] as const;
 const RESULTS_REPO_COMMIT_EMAIL = 'agentv@results-repo';
 const RESULTS_REPO_COMMIT_NAME = 'AgentV Results';
-export const DEFAULT_RESULTS_BRANCH = 'agentv/results/v1';
+export const DEFAULT_RESULTS_BRANCH = AGENTV_RESULTS_PRIMARY_REF;
 const GIT_EMPTY_TREE = '4b825dc642cb6eb9a060e54bf8d69288fbee4904';
 // The results branch is a self-rooted orphan whose first commit is a fixed,
 // byte-identical empty-tree genesis. Pinning the message, identity (see
diff --git a/packages/core/src/evaluation/run-artifacts.ts b/packages/core/src/evaluation/run-artifacts.ts
index c15fb205d..af93cc692 100644
--- a/packages/core/src/evaluation/run-artifacts.ts
+++ b/packages/core/src/evaluation/run-artifacts.ts
@@ -7,7 +7,8 @@
  * snake_case here so every caller produces the same artifacts.
  */
 
-import { mkdir, readFile, writeFile } from 'node:fs/promises';
+import { createHash } from 'node:crypto';
+import { copyFile, mkdir, readFile, writeFile } from 'node:fs/promises';
 import path from 'node:path';
 
 import { traceEnvelopeToTranscriptJsonLines } from '../import/types.js';
@@ -22,8 +23,22 @@ import {
 } from './projection-identity.js';
 import type { Message } from './providers/types.js';
 import { extractLastAssistantContent } from './providers/types.js';
+import {
+  AGENTV_RESULTS_ARTIFACTS_REF,
+  CANONICAL_TRACE_ARTIFACT_PATH,
+  CANONICAL_TRANSCRIPT_ARTIFACT_PATH,
+  type ResultArtifactFamily,
+  type ResultArtifactPointerWire,
+  type ResultArtifactPointersWire,
+  TRACE_JSON_MEDIA_TYPE,
+  TRANSCRIPT_JSONL_MEDIA_TYPE,
+  TRANSCRIPT_SCHEMA_VERSION,
+  type TranscriptArtifactPointerWire,
+  toResultArtifactPointerWire,
+} from './result-artifact-contract.js';
 import { normalizeResultRow } from './result-row-schema.js';
 import {
+  EXECUTION_TRACE_SCHEMA_VERSION,
   type TraceEnvelope,
   buildTraceEnvelopeFromEvaluationResult,
   toTraceEnvelopeWire,
@@ -205,6 +220,8 @@ export interface IndexArtifactEntry {
   readonly output_path?: string;
   readonly answer_path?: string;
   readonly transcript_path?: string;
+  readonly artifact_pointers?: ResultArtifactPointersWire;
+  readonly raw_provider_log_path?: string;
   readonly input_path?: string;
   readonly response_path?: string;
   readonly task_dir?: string;
@@ -226,7 +243,12 @@ export type ResultIndexArtifact = IndexArtifactEntry;
 export type AdditionalResultIndexFields = Partial<
   Pick<
     IndexArtifactEntry,
-    'task_dir' | 'eval_path' | 'targets_path' | 'files_path' | 'graders_path'
+    | 'task_dir'
+    | 'eval_path'
+    | 'targets_path'
+    | 'files_path'
+    | 'graders_path'
+    | 'raw_provider_log_path'
   >
 >;
 
@@ -351,7 +373,7 @@ function toIndexScore(score: GraderResult): Record<string, unknown> {
     score: score.score,
     weight: score.weight,
     verdict: score.verdict,
-    assertions: score.assertions.map(toIndexAssertion),
+    assertions: (score.assertions ?? []).map(toIndexAssertion),
     raw_request: score.rawRequest,
     input: score.input,
     target: score.target,
@@ -732,6 +754,26 @@ function resultHasExecutionTraceTranscript(result: EvaluationResult): boolean {
   return result.output.length > 0 || result.trace.messages.length > 0;
 }
 
+function rawProviderLogSourcePath(result: EvaluationResult): string | undefined {
+  const sourcePath = result.rawProviderLogPath?.trim();
+  return sourcePath ? sourcePath : undefined;
+}
+
+function rawProviderLogArtifactPath(outputsDir: string): string {
+  return path.join(outputsDir, 'raw', 'provider.log');
+}
+
+async function copyRawProviderLogArtifact(sourcePath: string, outputsDir: string): Promise<string> {
+  const destinationPath = rawProviderLogArtifactPath(outputsDir);
+  if (path.resolve(sourcePath) === path.resolve(destinationPath)) {
+    return destinationPath;
+  }
+
+  await mkdir(path.dirname(destinationPath), { recursive: true });
+  await copyFile(sourcePath, destinationPath);
+  return destinationPath;
+}
+
 interface TraceEnvelopeSidecarParams {
   readonly result: EvaluationResult;
   readonly outputDir: string;
@@ -751,10 +793,13 @@ function buildTraceEnvelopeSidecar(params: TraceEnvelopeSidecarParams): TraceEnv
     source: { path: RESULT_INDEX_FILENAME },
     capture: { content: 'full', redactionLevel: 'none', redactedFields: [] },
     artifacts: {
-      trace_path: 'outputs/trace.json',
+      trace_path: CANONICAL_TRACE_ARTIFACT_PATH,
       answer_path: params.result.output.length > 0 ? 'outputs/answer.md' : undefined,
       response_path: params.result.output.length > 0 ? 'outputs/response.md' : undefined,
-      transcript_path: hasTranscript ? 'outputs/transcript.jsonl' : undefined,
+      transcript_path: hasTranscript ? CANONICAL_TRANSCRIPT_ARTIFACT_PATH : undefined,
+      raw_provider_log_path: rawProviderLogSourcePath(params.result)
+        ? 'outputs/raw/provider.log'
+        : undefined,
     },
     duplicatePolicy: params.duplicatePolicy,
   });
@@ -772,6 +817,72 @@ async function writeTraceEnvelopeSidecar(
   return envelope;
 }
 
+function buildSidecarArtifactKey(family: ResultArtifactFamily, runRelativePath: string): string {
+  return path.posix.join(family, runRelativePath);
+}
+
+async function buildArtifactPointer(params: {
+  readonly filePath: string;
+  readonly runRelativePath: string;
+  readonly family: ResultArtifactFamily;
+  readonly schemaVersion: string;
+  readonly mediaType: string;
+}): Promise<ResultArtifactPointerWire> {
+  const content = await readFile(params.filePath);
+  const sha256 = createHash('sha256').update(content).digest('hex');
+  return toResultArtifactPointerWire({
+    ref: AGENTV_RESULTS_ARTIFACTS_REF,
+    key: buildSidecarArtifactKey(params.family, params.runRelativePath),
+    objectVersion: `sha256:${sha256}`,
+    path: params.runRelativePath,
+    sha256,
+    size: content.byteLength,
+    schemaVersion: params.schemaVersion,
+    mediaType: params.mediaType,
+    family: params.family,
+  });
+}
+
+async function buildTracePointer(
+  outputDir: string,
+  tracePath: string,
+): Promise<ResultArtifactPointerWire> {
+  return buildArtifactPointer({
+    filePath: tracePath,
+    runRelativePath: toRelativeArtifactPath(outputDir, tracePath),
+    family: 'traces',
+    schemaVersion: EXECUTION_TRACE_SCHEMA_VERSION,
+    mediaType: TRACE_JSON_MEDIA_TYPE,
+  });
+}
+
+async function buildTranscriptPointer(
+  outputDir: string,
+  transcriptPath: string,
+): Promise<TranscriptArtifactPointerWire> {
+  const pointer = await buildArtifactPointer({
+    filePath: transcriptPath,
+    runRelativePath: toRelativeArtifactPath(outputDir, transcriptPath),
+    family: 'transcripts',
+    schemaVersion: TRANSCRIPT_SCHEMA_VERSION,
+    mediaType: TRANSCRIPT_JSONL_MEDIA_TYPE,
+  });
+  return pointer as TranscriptArtifactPointerWire;
+}
+
+async function buildArtifactPointers(params: {
+  readonly outputDir: string;
+  readonly tracePath: string;
+  readonly transcriptPath?: string;
+}): Promise<ResultArtifactPointersWire> {
+  return {
+    trace: await buildTracePointer(params.outputDir, params.tracePath),
+    ...(params.transcriptPath
+      ? { transcript: await buildTranscriptPointer(params.outputDir, params.transcriptPath) }
+      : {}),
+  };
+}
+
 export function buildIndexArtifactEntry(
   result: EvaluationResult,
   options: {
@@ -782,6 +893,8 @@ export function buildIndexArtifactEntry(
     outputPath?: string;
     answerPath?: string;
     transcriptPath?: string;
+    artifactPointers?: ResultArtifactPointersWire;
+    rawProviderLogPath?: string;
     inputPath?: string;
     responsePath?: string;
     extraIndexFields?: AdditionalResultIndexFields;
@@ -822,6 +935,10 @@ export function buildIndexArtifactEntry(
     transcript_path: options.transcriptPath
       ? toRelativeArtifactPath(options.outputDir, options.transcriptPath)
       : undefined,
+    raw_provider_log_path: options.rawProviderLogPath
+      ? toRelativeArtifactPath(options.outputDir, options.rawProviderLogPath)
+      : undefined,
+    artifact_pointers: options.artifactPointers,
     input_path: options.inputPath
       ? toRelativeArtifactPath(options.outputDir, options.inputPath)
       : undefined,
@@ -843,12 +960,14 @@ export function buildResultIndexArtifact(
   options?: {
     projectionIdentity?: ProjectionIdentity;
     duplicatePolicy?: ExportDuplicatePolicy;
+    artifactPointers?: ResultArtifactPointersWire;
   },
 ): ResultIndexArtifact {
   const artifactSubdir = buildArtifactSubdir(result);
   const input = extractInput(result);
   const hasAnswer = result.output.length > 0;
   const hasTranscript = resultHasExecutionTraceTranscript(result);
+  const hasRawProviderLog = rawProviderLogSourcePath(result) !== undefined;
 
   return {
     timestamp: result.timestamp,
@@ -878,6 +997,10 @@ export function buildResultIndexArtifact(
     transcript_path: hasTranscript
       ? path.posix.join(artifactSubdir, 'outputs', 'transcript.jsonl')
       : undefined,
+    raw_provider_log_path: hasRawProviderLog
+      ? path.posix.join(artifactSubdir, 'outputs', 'raw', 'provider.log')
+      : undefined,
+    artifact_pointers: options?.artifactPointers,
     response_path: hasAnswer
       ? path.posix.join(artifactSubdir, 'outputs', 'response.md')
       : undefined,
@@ -1122,6 +1245,8 @@ function normalizeParsedResult(value: unknown): ParsedEvaluationResult | undefin
   }
 
   const result = value as Record<string, unknown>;
+  const parsedResult = { ...result };
+  parsedResult.rawProviderLogPath = undefined;
   const legacyOutputMessages = Array.isArray(result.output)
     ? result.output.filter(isOutputMessage)
     : undefined;
@@ -1148,7 +1273,7 @@ function normalizeParsedResult(value: unknown): ParsedEvaluationResult | undefin
       });
 
   return {
-    ...result,
+    ...parsedResult,
     timestamp: typeof result.timestamp === 'string' ? result.timestamp : new Date(0).toISOString(),
     testId: typeof result.testId === 'string' ? result.testId : 'unknown',
     score: typeof result.score === 'number' ? result.score : 0,
@@ -1263,6 +1388,11 @@ export async function writePerTestArtifacts(
       await writeFile(path.join(outputsDir, 'answer.md'), result.output, 'utf8');
       await writeFile(path.join(outputsDir, 'response.md'), result.output, 'utf8');
     }
+    const rawProviderLogSource = rawProviderLogSourcePath(result);
+    if (rawProviderLogSource) {
+      await copyRawProviderLogArtifact(rawProviderLogSource, outputsDir);
+    }
+    const tracePath = path.join(outputsDir, 'trace.json');
     const envelope = await writeTraceEnvelopeSidecar({
       result,
       outputDir,
@@ -1272,9 +1402,17 @@ export async function writePerTestArtifacts(
       runId: options?.runId,
       duplicatePolicy,
     });
-    if (hasTranscriptProjection(result, envelope)) {
-      await writeTranscriptJsonl(path.join(outputsDir, 'transcript.jsonl'), result, envelope);
+    const transcriptPath = hasTranscriptProjection(result, envelope)
+      ? path.join(outputsDir, 'transcript.jsonl')
+      : undefined;
+    if (transcriptPath) {
+      await writeTranscriptJsonl(transcriptPath, result, envelope);
     }
+    const artifactPointers = await buildArtifactPointers({
+      outputDir,
+      tracePath,
+      transcriptPath,
+    });
 
     const extraIndexFields = await collectAdditionalIndexFields(
       result,
@@ -1288,6 +1426,7 @@ export async function writePerTestArtifacts(
       ...buildResultIndexArtifact(result, extraIndexFields, {
         projectionIdentity: envelope.projectionIdentity,
         duplicatePolicy,
+        artifactPointers,
       }),
       experiment: options?.experiment,
     });
@@ -1351,6 +1490,11 @@ export async function writeArtifactsFromResults(
     const transcriptPath = hasTranscriptProjection(result, envelope)
       ? path.join(outputsDir, 'transcript.jsonl')
       : undefined;
+    const tracePath = path.join(outputsDir, 'trace.json');
+    const rawProviderLogSource = rawProviderLogSourcePath(result);
+    const rawProviderLogPath = rawProviderLogSource
+      ? rawProviderLogArtifactPath(outputsDir)
+      : undefined;
     const projectionIdentity = envelope.projectionIdentity;
     if (!projectionIdentity) {
       throw new Error(`Result ${result.testId ?? 'unknown'} is missing projection identity`);
@@ -1368,9 +1512,12 @@ export async function writeArtifactsFromResults(
       outputsDir,
       answerPath,
       responsePath,
+      tracePath,
       envelope,
       projectionIdentity,
       transcriptPath,
+      rawProviderLogSource,
+      rawProviderLogPath,
       identityId,
     };
   });
@@ -1416,14 +1563,22 @@ export async function writeArtifactsFromResults(
       await writeFile(plan.answerPath, result.output, 'utf8');
       await writeFile(plan.responsePath, result.output, 'utf8');
     }
+    if (plan.rawProviderLogSource) {
+      await copyRawProviderLogArtifact(plan.rawProviderLogSource, plan.outputsDir);
+    }
     await writeFile(
-      path.join(plan.outputsDir, 'trace.json'),
+      plan.tracePath,
       `${JSON.stringify(toTraceEnvelopeWire(envelope), null, 2)}\n`,
       'utf8',
     );
     if (plan.transcriptPath) {
       await writeTranscriptJsonl(plan.transcriptPath, result, envelope);
     }
+    const artifactPointers = await buildArtifactPointers({
+      outputDir,
+      tracePath: plan.tracePath,
+      transcriptPath: plan.transcriptPath,
+    });
 
     const extraIndexFields = await collectAdditionalIndexFields(
       result,
@@ -1442,6 +1597,8 @@ export async function writeArtifactsFromResults(
         outputPath: plan.answerPath,
         answerPath: plan.answerPath,
         transcriptPath: plan.transcriptPath,
+        artifactPointers,
+        rawProviderLogPath: plan.rawProviderLogPath,
         inputPath: plan.inputPath,
         responsePath: plan.responsePath,
         extraIndexFields,
diff --git a/packages/core/src/evaluation/trace-envelope.ts b/packages/core/src/evaluation/trace-envelope.ts
index ba3244139..c1fbb961e 100644
--- a/packages/core/src/evaluation/trace-envelope.ts
+++ b/packages/core/src/evaluation/trace-envelope.ts
@@ -775,7 +775,7 @@ function scoresFromResult(
     targetSpanId,
     evidence: dropUndefined({
       span_ids: [targetSpanId],
-      assertions: score.assertions.map((assertion) =>
+      assertions: (score.assertions ?? []).map((assertion) =>
         dropUndefined({
           text: assertion.text,
           passed: assertion.passed,
diff --git a/packages/core/src/evaluation/types.ts b/packages/core/src/evaluation/types.ts
index 5d1e139ca..01b63b398 100644
--- a/packages/core/src/evaluation/types.ts
+++ b/packages/core/src/evaluation/types.ts
@@ -1184,6 +1184,12 @@ export interface EvaluationResult {
   readonly error?: string;
   /** Canonical execution trace: messages, events, metrics, and provider provenance. */
   readonly trace: Trace;
+  /**
+   * Optional local provider-native session/stream log captured by a provider.
+   * Artifact writers copy this byte-for-byte into the run bundle as raw,
+   * non-canonical evidence and expose only the run-local pointer.
+   */
+  readonly rawProviderLogPath?: string;
   /** Path to the temporary workspace directory (included on failure for debugging) */
   readonly workspacePath?: string;
   /** Input messages sent to the agent. Always Message[] for consistent shape with output. */
diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts
index 0b351ca58..c0e10451d 100644
--- a/packages/core/src/index.ts
+++ b/packages/core/src/index.ts
@@ -9,6 +9,7 @@ export {
   ResultRowSchemaError,
   normalizeResultRow,
 } from './evaluation/result-row-schema.js';
+export * from './evaluation/result-artifact-contract.js';
 export { parseYamlValue } from './evaluation/yaml-loader.js';
 export * from './evaluation/yaml-parser.js';
 export {
diff --git a/packages/core/test/evaluation/loaders/agent-skills-parser.test.ts b/packages/core/test/evaluation/loaders/agent-skills-parser.test.ts
index 166542fc5..40183638e 100644
--- a/packages/core/test/evaluation/loaders/agent-skills-parser.test.ts
+++ b/packages/core/test/evaluation/loaders/agent-skills-parser.test.ts
@@ -218,6 +218,24 @@ describe('parseAgentSkillsEvals', () => {
     expect(tests[0].metadata).toBeUndefined();
   });
 
+  it('ignores transcript artifact-looking fields in evals.json cases', () => {
+    const tests = parseAgentSkillsEvals({
+      evals: [
+        {
+          id: 1,
+          prompt: 'test prompt',
+          transcript_path: 'outputs/transcript.jsonl',
+          raw_provider_log_path: 'outputs/raw/provider.log',
+        },
+      ],
+    });
+
+    expect(tests).toHaveLength(1);
+    expect(tests[0].metadata).toBeUndefined();
+    expect(tests[0]).not.toHaveProperty('transcript_path');
+    expect(tests[0]).not.toHaveProperty('raw_provider_log_path');
+  });
+
   it('includes source in error messages', () => {
     expect(() => parseAgentSkillsEvals({}, 'my-evals.json')).toThrow('my-evals.json');
   });
diff --git a/packages/core/test/evaluation/orchestrator.test.ts b/packages/core/test/evaluation/orchestrator.test.ts
index 768db40b8..a7c71ad39 100644
--- a/packages/core/test/evaluation/orchestrator.test.ts
+++ b/packages/core/test/evaluation/orchestrator.test.ts
@@ -23,6 +23,7 @@ import {
   type ReplayFixtureRecord,
   serializeReplayFixtureRecord,
 } from '../../src/evaluation/replay-fixtures.js';
+import { writeArtifactsFromResults } from '../../src/evaluation/run-artifacts.js';
 import { RunBudgetTracker } from '../../src/evaluation/run-budget-tracker.js';
 import {
   buildTraceEnvelopeFromEvaluationResult,
@@ -722,6 +723,49 @@ console.log('spreadsheet: revenue,total\\nQ1,42');`,
     expect(result.failureReasonCode).toBe('provider_error');
   });
 
+  it('copies and indexes raw provider logs from normal per-case evaluation artifacts', async () => {
+    const tempDir = mkdtempSync(path.join(tmpdir(), 'agentv-raw-provider-log-'));
+    const rawLogPath = path.join(tempDir, 'provider-native-session.jsonl');
+    writeFileSync(rawLogPath, '{"event":"provider-native"}\n', 'utf8');
+
+    const provider = new SequenceProvider('mock', {
+      responses: [
+        {
+          output: [{ role: 'assistant', content: 'Raw log evidence preserved.' }],
+          raw: { logFile: rawLogPath },
+        },
+      ],
+    });
+
+    const result = await runEvalCase({
+      evalCase: baseTestCase,
+      provider,
+      target: baseTarget,
+      evaluators: evaluatorRegistry,
+    });
+
+    expect(result.rawProviderLogPath).toBe(rawLogPath);
+
+    const outputDir = path.join(tempDir, 'artifacts');
+    await writeArtifactsFromResults([result], outputDir);
+
+    const outputsDir = path.join(outputDir, 'test-dataset', 'case-1', 'outputs');
+    expect(readFileSync(path.join(outputsDir, 'raw', 'provider.log'), 'utf8')).toBe(
+      '{"event":"provider-native"}\n',
+    );
+    expect(readdirSync(outputsDir)).toContain('transcript.jsonl');
+    expect(readdirSync(outputsDir)).not.toContain('transcript.json');
+
+    const indexRows = readFileSync(path.join(outputDir, 'index.jsonl'), 'utf8')
+      .trim()
+      .split('\n')
+      .map((line) => JSON.parse(line) as Record<string, unknown>);
+    expect(indexRows[0]?.raw_provider_log_path).toBe(
+      'test-dataset/case-1/outputs/raw/provider.log',
+    );
+    expect(indexRows[0]?.transcript_path).toBe('test-dataset/case-1/outputs/transcript.jsonl');
+  });
+
   it('reports failed progress status for batch item errors', async () => {
     class BatchProvider implements Provider {
       readonly id = 'batch:mock';
diff --git a/packages/core/test/evaluation/results-repo.test.ts b/packages/core/test/evaluation/results-repo.test.ts
index c006c4e4c..c66852b8f 100644
--- a/packages/core/test/evaluation/results-repo.test.ts
+++ b/packages/core/test/evaluation/results-repo.test.ts
@@ -6,6 +6,7 @@ import path from 'node:path';
 import { afterEach, beforeEach, describe, expect, it } from 'bun:test';
 
 import type { ResultsConfig } from '../../src/evaluation/loaders/config-loader.js';
+import { AGENTV_RESULTS_REFS } from '../../src/evaluation/result-artifact-contract.js';
 import {
   DEFAULT_RESULTS_BRANCH,
   buildWipBranchName,
@@ -51,6 +52,17 @@ function createResultsConfig(repoDir: string, cloneDir: string): ResultsConfig {
   };
 }
 
+function refsHavePrefixConflict(refs: readonly string[]): boolean {
+  for (const ref of refs) {
+    for (const other of refs) {
+      if (ref !== other && other.startsWith(`${ref}/`)) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
 function initializeRemoteRepo(rootDir: string): { remoteDir: string; seedDir: string } {
   const remoteDir = path.join(rootDir, 'results-remote.git');
   git(`git init --bare --initial-branch=main --quiet "${remoteDir}"`, rootDir);
@@ -332,6 +344,13 @@ describe('results repo write path', () => {
     );
 
     expect(DEFAULT_RESULTS_BRANCH).toBe('agentv/results/v1');
+    expect(DEFAULT_RESULTS_BRANCH).toBe(AGENTV_RESULTS_REFS.primary);
+    expect(AGENTV_RESULTS_REFS).toEqual({
+      primary: 'agentv/results/v1',
+      artifacts: 'agentv/artifacts/v1',
+      oplog: 'agentv/oplog/v1',
+    });
+    expect(refsHavePrefixConflict(Object.values(AGENTV_RESULTS_REFS))).toBe(false);
     expect(normalized.branch).toBe('agentv/results/v1');
     expect(normalized.repo_path).toBe('/tmp/source-project');
     expect(normalized.auto_push).toBe(false);