Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/ecr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ jobs:
build-args: |
SEI_CHAIN_REF=${{ inputs.ref || github.sha }}
GO_BUILD_TAGS=mock_block_validation
- name: Build and push seid with mock balances + mock chain validation
- name: Build and push seid with mock chain validation
uses: docker/build-push-action@v6
with:
context: '.'
Expand All @@ -75,7 +75,7 @@ jobs:
tags: ${{ steps.login-ecr.outputs.registry }}/sei/sei-chain:mock_chain_validation-${{ inputs.tag || inputs.ref || github.sha }}
build-args: |
SEI_CHAIN_REF=${{ inputs.ref || github.sha }}
GO_BUILD_TAGS=mock_balances mock_chain_validation
GO_BUILD_TAGS=mock_chain_validation
Comment thread
claude[bot] marked this conversation as resolved.
Comment thread
bdchatham marked this conversation as resolved.
- name: Build and push seid
uses: docker/build-push-action@v6
with:
Expand Down
27 changes: 23 additions & 4 deletions .github/workflows/nightly-ecr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ jobs:
echo "nightly=nightly-${DATE}-${SHA7}" >> "$GITHUB_OUTPUT"
echo "mock=mock-nightly-${DATE}-${SHA7}" >> "$GITHUB_OUTPUT"
echo "mock_chain=mock_chain_validation-nightly-${DATE}-${SHA7}" >> "$GITHUB_OUTPUT"
echo "mock_chain_balances=mock_chain_validation-mock_balances-nightly-${DATE}-${SHA7}" >> "$GITHUB_OUTPUT"
- name: AWS Login
uses: aws-actions/configure-aws-credentials@v4
Expand Down Expand Up @@ -78,9 +79,10 @@ jobs:
SEI_CHAIN_REF=${{ steps.tag.outputs.sha }}
GO_BUILD_TAGS=mock_balances
# mock_chain_validation -- consumed by chaos-lab forked-state replays;
# swallows every swallow-eligible halting validation failure.
- name: Build and push mock_chain_validation-nightly (mock_balances + mock_chain_validation)
# mock_chain_validation -- faithful real-history replay (e.g. the memIAVL->flatKV
# migration shadow). No mock_balances, so real transactions execute against real
# balances. Matches the on-demand ecr.yml mock_chain_validation-<ref> image.
- name: Build and push mock_chain_validation-nightly (mock_chain_validation)
uses: docker/build-push-action@v6
with:
context: '.'
Expand All @@ -89,6 +91,22 @@ jobs:
tags: ${{ steps.login-ecr.outputs.registry }}/sei/sei-chain:${{ steps.tag.outputs.mock_chain }}
cache-from: type=registry,ref=${{ steps.login-ecr.outputs.registry }}/sei/build-cache:nightly-mock-chain
cache-to: type=registry,ref=${{ steps.login-ecr.outputs.registry }}/sei/build-cache:nightly-mock-chain,mode=max
build-args: |
SEI_CHAIN_REF=${{ steps.tag.outputs.sha }}
GO_BUILD_TAGS=mock_chain_validation
# mock_balances + mock_chain_validation -- benchmark/chaos replays that run on a
# fresh chain, submit synthetic txs, and need pre-funded accounts (consumed by
# chaos-lab forked-state replays).
- name: Build and push mock_chain_validation-mock_balances-nightly (mock_balances + mock_chain_validation)
uses: docker/build-push-action@v6
with:
context: '.'
platforms: linux/amd64
push: true
tags: ${{ steps.login-ecr.outputs.registry }}/sei/sei-chain:${{ steps.tag.outputs.mock_chain_balances }}
cache-from: type=registry,ref=${{ steps.login-ecr.outputs.registry }}/sei/build-cache:nightly-mock-chain-balances
cache-to: type=registry,ref=${{ steps.login-ecr.outputs.registry }}/sei/build-cache:nightly-mock-chain-balances,mode=max
build-args: |
SEI_CHAIN_REF=${{ steps.tag.outputs.sha }}
GO_BUILD_TAGS=mock_balances mock_chain_validation
Expand All @@ -101,4 +119,5 @@ jobs:
echo "|-----|---------|" >> "$GITHUB_STEP_SUMMARY"
echo "| \`${{ steps.tag.outputs.nightly }}\` | regular |" >> "$GITHUB_STEP_SUMMARY"
echo "| \`${{ steps.tag.outputs.mock }}\` | mock_balances |" >> "$GITHUB_STEP_SUMMARY"
echo "| \`${{ steps.tag.outputs.mock_chain }}\` | mock_balances + mock_chain_validation |" >> "$GITHUB_STEP_SUMMARY"
echo "| \`${{ steps.tag.outputs.mock_chain }}\` | mock_chain_validation |" >> "$GITHUB_STEP_SUMMARY"
echo "| \`${{ steps.tag.outputs.mock_chain_balances }}\` | mock_balances + mock_chain_validation |" >> "$GITHUB_STEP_SUMMARY"
5 changes: 4 additions & 1 deletion sei-cosmos/x/upgrade/abci.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
sdk "github.com/sei-protocol/sei-chain/sei-cosmos/types"
"github.com/sei-protocol/sei-chain/sei-cosmos/x/upgrade/keeper"
"github.com/sei-protocol/sei-chain/sei-cosmos/x/upgrade/types"
tmtypes "github.com/sei-protocol/sei-chain/sei-tendermint/types"
"github.com/sei-protocol/seilog"
"go.opentelemetry.io/otel/attribute"
otelmetric "go.opentelemetry.io/otel/metric"
Expand Down Expand Up @@ -107,7 +108,9 @@ func BeginBlocker(k keeper.Keeper, ctx sdk.Context) {
if k.HasHandler(plan.Name) {
downgradeMsg := fmt.Sprintf("BINARY UPDATED BEFORE TRIGGER! UPGRADE \"%s\" - in binary but not executed on chain", plan.Name)
logger.Error(downgradeMsg)
panic(downgradeMsg)
if err := tmtypes.DefaultConsensusPolicy().HandleError(fmt.Errorf("%s: %w", downgradeMsg, tmtypes.ErrUpgradeBeforeTrigger)); err != nil {
panic(downgradeMsg)
}
}
}

Expand Down
53 changes: 53 additions & 0 deletions sei-cosmos/x/upgrade/abci_halt_default_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
//go:build !mock_chain_validation && !mock_block_validation

// BeginBlocker panics when the binary carries a handler for an upgrade height
// the chain has not reached only in the default build; a mock validation build
// swallows ErrUpgradeBeforeTrigger to let a replay run past it, so this halt is
// default-build only.
package upgrade_test

import (
"testing"
"time"

"github.com/stretchr/testify/require"

sdk "github.com/sei-protocol/sei-chain/sei-cosmos/types"
"github.com/sei-protocol/sei-chain/sei-cosmos/types/module"
"github.com/sei-protocol/sei-chain/sei-cosmos/x/upgrade"
"github.com/sei-protocol/sei-chain/sei-cosmos/x/upgrade/types"
)

func TestHaltIfTooNew(t *testing.T) {
s := setupTest(t, 10)
t.Log("Verify that we don't panic with registered plan not in database at all")
var called int
s.keeper.SetUpgradeHandler("future", func(_ sdk.Context, _ types.Plan, vm module.VersionMap) (module.VersionMap, error) {
called++
return vm, nil
})

newCtx := s.ctx.WithBlockHeight(s.ctx.BlockHeight() + 1).WithBlockTime(time.Now())
require.NotPanics(t, func() {
upgrade.BeginBlocker(s.keeper, newCtx)
})
require.Equal(t, 0, called)

t.Log("Verify we panic if we have a registered handler ahead of time")
err := s.handler(s.ctx, &types.SoftwareUpgradeProposal{Title: "prop", Plan: types.Plan{Name: "future", Height: s.ctx.BlockHeight() + 3}})
require.NoError(t, err)
require.Panics(t, func() {
upgrade.BeginBlocker(s.keeper, newCtx)
})
require.Equal(t, 0, called)

t.Log("Verify we no longer panic if the plan is on time")

futCtx := s.ctx.WithBlockHeight(s.ctx.BlockHeight() + 3).WithBlockTime(time.Now())
require.NotPanics(t, func() {
upgrade.BeginBlocker(s.keeper, futCtx)
})
require.Equal(t, 1, called)

VerifyCleared(t, futCtx)
}
34 changes: 0 additions & 34 deletions sei-cosmos/x/upgrade/abci_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,40 +121,6 @@ func VerifyDoUpgradeWithCtx(t *testing.T, newCtx sdk.Context, proposalName strin
VerifyCleared(t, newCtx)
}

func TestHaltIfTooNew(t *testing.T) {
s := setupTest(t, 10)
t.Log("Verify that we don't panic with registered plan not in database at all")
var called int
s.keeper.SetUpgradeHandler("future", func(_ sdk.Context, _ types.Plan, vm module.VersionMap) (module.VersionMap, error) {
called++
return vm, nil
})

newCtx := s.ctx.WithBlockHeight(s.ctx.BlockHeight() + 1).WithBlockTime(time.Now())
require.NotPanics(t, func() {
upgrade.BeginBlocker(s.keeper, newCtx)
})
require.Equal(t, 0, called)

t.Log("Verify we panic if we have a registered handler ahead of time")
err := s.handler(s.ctx, &types.SoftwareUpgradeProposal{Title: "prop", Plan: types.Plan{Name: "future", Height: s.ctx.BlockHeight() + 3}})
require.NoError(t, err)
require.Panics(t, func() {
upgrade.BeginBlocker(s.keeper, newCtx)
})
require.Equal(t, 0, called)

t.Log("Verify we no longer panic if the plan is on time")

futCtx := s.ctx.WithBlockHeight(s.ctx.BlockHeight() + 3).WithBlockTime(time.Now())
require.NotPanics(t, func() {
upgrade.BeginBlocker(s.keeper, futCtx)
})
require.Equal(t, 1, called)

VerifyCleared(t, futCtx)
}

func VerifyCleared(t *testing.T, newCtx sdk.Context) {
t.Log("Verify that the upgrade plan has been cleared")
bz, err := s.querier(newCtx, []string{types.QueryCurrent}, abci.RequestQuery{})
Expand Down
23 changes: 23 additions & 0 deletions sei-db/state_db/sc/memiavl/db.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,15 @@ import (
"context"
"errors"
"fmt"
"io/fs"
"math"
"os"
"path/filepath"
"runtime"
"strconv"
"strings"
"sync"
"syscall"
"time"

"github.com/alitto/pond"
Expand Down Expand Up @@ -811,6 +813,22 @@ func (db *DB) RewriteSnapshot(ctx context.Context) error {

// Rename temporary directory to final location
if err := os.Rename(path, targetPath); err != nil {
// An existing snapshot-<h> directory (from a prior atomic rename) can be
// used; drop our redundant temp rather than failing this rewrite. Only a
// directory is a valid prior snapshot -- a non-directory at the path is
// corruption/external interference and must not be adopted.
if errors.Is(err, fs.ErrExist) || errors.Is(err, syscall.ENOTEMPTY) {
if info, statErr := os.Stat(targetPath); statErr != nil || !info.IsDir() {
return fmt.Errorf("snapshot path %q exists but is not a usable directory: %w", targetPath, err)
}
logger.Info("reusing existing snapshot directory, dropping redundant temp",
"snapshotDir", snapshotDir,
)
if rmErr := os.RemoveAll(path); rmErr != nil {
return rmErr
}
return updateCurrentSymlink(db.dir, snapshotDir)
Comment thread
bdchatham marked this conversation as resolved.
Comment thread
bdchatham marked this conversation as resolved.
}
logger.Error("failed to rename snapshot directory, cleaning up",
"tmpDir", tmpDir,
"targetDir", snapshotDir,
Expand Down Expand Up @@ -1231,6 +1249,11 @@ func initEmptyDB(dir string, initialVersion uint32) error {
// it could fail under concurrent usage for tmp file conflicts.
func updateCurrentSymlink(dir, snapshot string) error {
tmpPath := currentTmpPath(dir)
// A crash between Symlink and Rename can leave current-tmp behind; remove it
// so a re-offered restore is idempotent rather than failing with EEXIST.
if err := os.Remove(tmpPath); err != nil && !errors.Is(err, fs.ErrNotExist) {
return err
}
if err := os.Symlink(snapshot, tmpPath); err != nil {
return err
}
Expand Down
13 changes: 13 additions & 0 deletions sei-db/state_db/sc/memiavl/db_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1101,3 +1101,16 @@ func TestCloseWithSuccessfulBackgroundSnapshot(t *testing.T) {
err = db.Close()
require.NoError(t, err)
}

// A crash between Symlink and Rename can leave current-tmp behind; a re-offered
// restore must still repoint current rather than failing with EEXIST.
func TestUpdateCurrentSymlinkClearsStaleTmp(t *testing.T) {
dir := t.TempDir()
require.NoError(t, os.Symlink("snapshot-0", currentTmpPath(dir)))

require.NoError(t, updateCurrentSymlink(dir, "snapshot-1"))

target, err := os.Readlink(currentPath(dir))
require.NoError(t, err)
require.Equal(t, "snapshot-1", target)
}
56 changes: 47 additions & 9 deletions sei-db/state_db/sc/memiavl/import.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,13 @@ package memiavl

import (
"context"
"errors"
"fmt"
"io/fs"
"math"
"os"
"path/filepath"
"syscall"
"time"

"github.com/sei-protocol/sei-chain/sei-db/proto"
Expand Down Expand Up @@ -48,13 +51,20 @@ func NewMultiTreeImporter(dir string, height uint64) (*MultiTreeImporter, error)
return nil, fmt.Errorf("fail to lock db: %w", err)
}

return &MultiTreeImporter{
mti := &MultiTreeImporter{
dir: dir,
height: int64(height),
snapshotDir: snapshotName(int64(height)),
fileLock: fileLock,
ctx: context.Background(), // Default to background context for backward compatibility
}, nil
}
// State-sync can re-offer the same snapshot, so a prior pass may have left a
// temp dir at this height; clear it so it can't poison this import.
if err := os.RemoveAll(mti.tmpDir()); err != nil {
_ = fileLock.Unlock()
return nil, fmt.Errorf("fail to clear stale import tmp dir: %w", err)
}
return mti, nil
}

func (mti *MultiTreeImporter) tmpDir() string {
Expand Down Expand Up @@ -87,7 +97,15 @@ func (mti *MultiTreeImporter) AddNode(node *types.SnapshotNode) {
mti.importer.Add(node)
}

func (mti *MultiTreeImporter) Close() error {
func (mti *MultiTreeImporter) Close() (err error) {
// Release the import flock on every return path; a leaked lock fails a
// same-process restore re-offer with ErrLocked.
defer func() {
if unlockErr := mti.fileLock.Unlock(); unlockErr != nil && err == nil {
err = unlockErr
}
}()

if mti.importer != nil {
if err := mti.importer.Close(); err != nil {
return err
Expand All @@ -100,14 +118,34 @@ func (mti *MultiTreeImporter) Close() error {
return err
}

if err := os.Rename(tmpDir, filepath.Join(mti.dir, mti.snapshotDir)); err != nil {
return err
// A re-offered restore may have already produced snapshot-<h>; adopt it and
// drop our temp instead of failing. The ErrExist/ENOTEMPTY arm covers
// rename-into-existing-dir across kernels (EEXIST darwin, ENOTEMPTY linux).
finalDir := filepath.Join(mti.dir, mti.snapshotDir)
if info, statErr := os.Stat(finalDir); statErr == nil {
// Only a directory is a valid prior snapshot; a non-directory at this path
// is corruption/external interference and must not be adopted.
if !info.IsDir() {
return fmt.Errorf("snapshot path %q exists but is not a directory", finalDir)
}
if rmErr := os.RemoveAll(tmpDir); rmErr != nil {
return rmErr
}
} else if err := os.Rename(tmpDir, finalDir); err != nil {
if !errors.Is(err, fs.ErrExist) && !errors.Is(err, syscall.ENOTEMPTY) {
return err
}
// finalDir appeared between the stat and the rename; only a directory is a
// valid prior snapshot, so don't adopt a non-directory.
if info, statErr := os.Stat(finalDir); statErr != nil || !info.IsDir() {
return fmt.Errorf("snapshot path %q exists but is not a directory: %w", finalDir, err)
}
if rmErr := os.RemoveAll(tmpDir); rmErr != nil {
return rmErr
}
Comment thread
bdchatham marked this conversation as resolved.
}

if err := updateCurrentSymlink(mti.dir, mti.snapshotDir); err != nil {
return err
}
return mti.fileLock.Unlock()
return updateCurrentSymlink(mti.dir, mti.snapshotDir)
Comment thread
bdchatham marked this conversation as resolved.
}

// TreeImporter import a single memiavl tree from state-sync snapshot
Expand Down
3 changes: 3 additions & 0 deletions sei-tendermint/internal/blocksync/reactor.go
Original file line number Diff line number Diff line change
Expand Up @@ -546,6 +546,9 @@ func (s *syncController) poolRoutine(ctx context.Context, pool *BlockPool, initi
firstID := types.BlockID{Hash: first.Hash(), PartSetHeader: firstParts.Header()}

err = state.Validators.VerifyCommitLight(chainID, firstID, first.Height, second.LastCommit)
if err != nil {
err = types.DefaultConsensusPolicy().HandleError(fmt.Errorf("%w: %w", types.ErrLastCommitVerify, err))
}
if err == nil {
err = s.blockExec.ValidateBlock(ctx, state, first)
}
Expand Down
Loading
Loading