diff --git a/.github/workflows/ecr.yml b/.github/workflows/ecr.yml index a437ed0d38..eb41832100 100644 --- a/.github/workflows/ecr.yml +++ b/.github/workflows/ecr.yml @@ -64,7 +64,7 @@ jobs: build-args: | SEI_CHAIN_REF=${{ inputs.ref || github.sha }} GO_BUILD_TAGS=mock_block_validation - - name: Build and push seid with mock balances + mock chain validation + - name: Build and push seid with mock chain validation uses: docker/build-push-action@v6 with: context: '.' @@ -75,7 +75,7 @@ jobs: tags: ${{ steps.login-ecr.outputs.registry }}/sei/sei-chain:mock_chain_validation-${{ inputs.tag || inputs.ref || github.sha }} build-args: | SEI_CHAIN_REF=${{ inputs.ref || github.sha }} - GO_BUILD_TAGS=mock_balances mock_chain_validation + GO_BUILD_TAGS=mock_chain_validation - name: Build and push seid uses: docker/build-push-action@v6 with: diff --git a/.github/workflows/nightly-ecr.yml b/.github/workflows/nightly-ecr.yml index e65266e5f1..b3cd66329f 100644 --- a/.github/workflows/nightly-ecr.yml +++ b/.github/workflows/nightly-ecr.yml @@ -35,6 +35,7 @@ jobs: echo "nightly=nightly-${DATE}-${SHA7}" >> "$GITHUB_OUTPUT" echo "mock=mock-nightly-${DATE}-${SHA7}" >> "$GITHUB_OUTPUT" echo "mock_chain=mock_chain_validation-nightly-${DATE}-${SHA7}" >> "$GITHUB_OUTPUT" + echo "mock_chain_balances=mock_chain_validation-mock_balances-nightly-${DATE}-${SHA7}" >> "$GITHUB_OUTPUT" - name: AWS Login uses: aws-actions/configure-aws-credentials@v4 @@ -78,9 +79,10 @@ jobs: SEI_CHAIN_REF=${{ steps.tag.outputs.sha }} GO_BUILD_TAGS=mock_balances - # mock_chain_validation -- consumed by chaos-lab forked-state replays; - # swallows every swallow-eligible halting validation failure. - - name: Build and push mock_chain_validation-nightly (mock_balances + mock_chain_validation) + # mock_chain_validation -- faithful real-history replay (e.g. the memIAVL->flatKV + # migration shadow). No mock_balances, so real transactions execute against real + # balances. Matches the on-demand ecr.yml mock_chain_validation- image. + - name: Build and push mock_chain_validation-nightly (mock_chain_validation) uses: docker/build-push-action@v6 with: context: '.' @@ -89,6 +91,22 @@ jobs: tags: ${{ steps.login-ecr.outputs.registry }}/sei/sei-chain:${{ steps.tag.outputs.mock_chain }} cache-from: type=registry,ref=${{ steps.login-ecr.outputs.registry }}/sei/build-cache:nightly-mock-chain cache-to: type=registry,ref=${{ steps.login-ecr.outputs.registry }}/sei/build-cache:nightly-mock-chain,mode=max + build-args: | + SEI_CHAIN_REF=${{ steps.tag.outputs.sha }} + GO_BUILD_TAGS=mock_chain_validation + + # mock_balances + mock_chain_validation -- benchmark/chaos replays that run on a + # fresh chain, submit synthetic txs, and need pre-funded accounts (consumed by + # chaos-lab forked-state replays). + - name: Build and push mock_chain_validation-mock_balances-nightly (mock_balances + mock_chain_validation) + uses: docker/build-push-action@v6 + with: + context: '.' + platforms: linux/amd64 + push: true + tags: ${{ steps.login-ecr.outputs.registry }}/sei/sei-chain:${{ steps.tag.outputs.mock_chain_balances }} + cache-from: type=registry,ref=${{ steps.login-ecr.outputs.registry }}/sei/build-cache:nightly-mock-chain-balances + cache-to: type=registry,ref=${{ steps.login-ecr.outputs.registry }}/sei/build-cache:nightly-mock-chain-balances,mode=max build-args: | SEI_CHAIN_REF=${{ steps.tag.outputs.sha }} GO_BUILD_TAGS=mock_balances mock_chain_validation @@ -101,4 +119,5 @@ jobs: echo "|-----|---------|" >> "$GITHUB_STEP_SUMMARY" echo "| \`${{ steps.tag.outputs.nightly }}\` | regular |" >> "$GITHUB_STEP_SUMMARY" echo "| \`${{ steps.tag.outputs.mock }}\` | mock_balances |" >> "$GITHUB_STEP_SUMMARY" - echo "| \`${{ steps.tag.outputs.mock_chain }}\` | mock_balances + mock_chain_validation |" >> "$GITHUB_STEP_SUMMARY" + echo "| \`${{ steps.tag.outputs.mock_chain }}\` | mock_chain_validation |" >> "$GITHUB_STEP_SUMMARY" + echo "| \`${{ steps.tag.outputs.mock_chain_balances }}\` | mock_balances + mock_chain_validation |" >> "$GITHUB_STEP_SUMMARY" diff --git a/sei-cosmos/x/upgrade/abci.go b/sei-cosmos/x/upgrade/abci.go index b36699be40..aad1a18a5e 100644 --- a/sei-cosmos/x/upgrade/abci.go +++ b/sei-cosmos/x/upgrade/abci.go @@ -10,6 +10,7 @@ import ( sdk "github.com/sei-protocol/sei-chain/sei-cosmos/types" "github.com/sei-protocol/sei-chain/sei-cosmos/x/upgrade/keeper" "github.com/sei-protocol/sei-chain/sei-cosmos/x/upgrade/types" + tmtypes "github.com/sei-protocol/sei-chain/sei-tendermint/types" "github.com/sei-protocol/seilog" "go.opentelemetry.io/otel/attribute" otelmetric "go.opentelemetry.io/otel/metric" @@ -107,7 +108,9 @@ func BeginBlocker(k keeper.Keeper, ctx sdk.Context) { if k.HasHandler(plan.Name) { downgradeMsg := fmt.Sprintf("BINARY UPDATED BEFORE TRIGGER! UPGRADE \"%s\" - in binary but not executed on chain", plan.Name) logger.Error(downgradeMsg) - panic(downgradeMsg) + if err := tmtypes.DefaultConsensusPolicy().HandleError(fmt.Errorf("%s: %w", downgradeMsg, tmtypes.ErrUpgradeBeforeTrigger)); err != nil { + panic(downgradeMsg) + } } } diff --git a/sei-cosmos/x/upgrade/abci_halt_default_test.go b/sei-cosmos/x/upgrade/abci_halt_default_test.go new file mode 100644 index 0000000000..062ffee24a --- /dev/null +++ b/sei-cosmos/x/upgrade/abci_halt_default_test.go @@ -0,0 +1,53 @@ +//go:build !mock_chain_validation && !mock_block_validation + +// BeginBlocker panics when the binary carries a handler for an upgrade height +// the chain has not reached only in the default build; a mock validation build +// swallows ErrUpgradeBeforeTrigger to let a replay run past it, so this halt is +// default-build only. +package upgrade_test + +import ( + "testing" + "time" + + "github.com/stretchr/testify/require" + + sdk "github.com/sei-protocol/sei-chain/sei-cosmos/types" + "github.com/sei-protocol/sei-chain/sei-cosmos/types/module" + "github.com/sei-protocol/sei-chain/sei-cosmos/x/upgrade" + "github.com/sei-protocol/sei-chain/sei-cosmos/x/upgrade/types" +) + +func TestHaltIfTooNew(t *testing.T) { + s := setupTest(t, 10) + t.Log("Verify that we don't panic with registered plan not in database at all") + var called int + s.keeper.SetUpgradeHandler("future", func(_ sdk.Context, _ types.Plan, vm module.VersionMap) (module.VersionMap, error) { + called++ + return vm, nil + }) + + newCtx := s.ctx.WithBlockHeight(s.ctx.BlockHeight() + 1).WithBlockTime(time.Now()) + require.NotPanics(t, func() { + upgrade.BeginBlocker(s.keeper, newCtx) + }) + require.Equal(t, 0, called) + + t.Log("Verify we panic if we have a registered handler ahead of time") + err := s.handler(s.ctx, &types.SoftwareUpgradeProposal{Title: "prop", Plan: types.Plan{Name: "future", Height: s.ctx.BlockHeight() + 3}}) + require.NoError(t, err) + require.Panics(t, func() { + upgrade.BeginBlocker(s.keeper, newCtx) + }) + require.Equal(t, 0, called) + + t.Log("Verify we no longer panic if the plan is on time") + + futCtx := s.ctx.WithBlockHeight(s.ctx.BlockHeight() + 3).WithBlockTime(time.Now()) + require.NotPanics(t, func() { + upgrade.BeginBlocker(s.keeper, futCtx) + }) + require.Equal(t, 1, called) + + VerifyCleared(t, futCtx) +} diff --git a/sei-cosmos/x/upgrade/abci_test.go b/sei-cosmos/x/upgrade/abci_test.go index 6d1b96ad0d..40f2c581c6 100644 --- a/sei-cosmos/x/upgrade/abci_test.go +++ b/sei-cosmos/x/upgrade/abci_test.go @@ -121,40 +121,6 @@ func VerifyDoUpgradeWithCtx(t *testing.T, newCtx sdk.Context, proposalName strin VerifyCleared(t, newCtx) } -func TestHaltIfTooNew(t *testing.T) { - s := setupTest(t, 10) - t.Log("Verify that we don't panic with registered plan not in database at all") - var called int - s.keeper.SetUpgradeHandler("future", func(_ sdk.Context, _ types.Plan, vm module.VersionMap) (module.VersionMap, error) { - called++ - return vm, nil - }) - - newCtx := s.ctx.WithBlockHeight(s.ctx.BlockHeight() + 1).WithBlockTime(time.Now()) - require.NotPanics(t, func() { - upgrade.BeginBlocker(s.keeper, newCtx) - }) - require.Equal(t, 0, called) - - t.Log("Verify we panic if we have a registered handler ahead of time") - err := s.handler(s.ctx, &types.SoftwareUpgradeProposal{Title: "prop", Plan: types.Plan{Name: "future", Height: s.ctx.BlockHeight() + 3}}) - require.NoError(t, err) - require.Panics(t, func() { - upgrade.BeginBlocker(s.keeper, newCtx) - }) - require.Equal(t, 0, called) - - t.Log("Verify we no longer panic if the plan is on time") - - futCtx := s.ctx.WithBlockHeight(s.ctx.BlockHeight() + 3).WithBlockTime(time.Now()) - require.NotPanics(t, func() { - upgrade.BeginBlocker(s.keeper, futCtx) - }) - require.Equal(t, 1, called) - - VerifyCleared(t, futCtx) -} - func VerifyCleared(t *testing.T, newCtx sdk.Context) { t.Log("Verify that the upgrade plan has been cleared") bz, err := s.querier(newCtx, []string{types.QueryCurrent}, abci.RequestQuery{}) diff --git a/sei-db/state_db/sc/memiavl/db.go b/sei-db/state_db/sc/memiavl/db.go index 6e9339c799..ddd18f675c 100644 --- a/sei-db/state_db/sc/memiavl/db.go +++ b/sei-db/state_db/sc/memiavl/db.go @@ -4,6 +4,7 @@ import ( "context" "errors" "fmt" + "io/fs" "math" "os" "path/filepath" @@ -11,6 +12,7 @@ import ( "strconv" "strings" "sync" + "syscall" "time" "github.com/alitto/pond" @@ -811,6 +813,22 @@ func (db *DB) RewriteSnapshot(ctx context.Context) error { // Rename temporary directory to final location if err := os.Rename(path, targetPath); err != nil { + // An existing snapshot- directory (from a prior atomic rename) can be + // used; drop our redundant temp rather than failing this rewrite. Only a + // directory is a valid prior snapshot -- a non-directory at the path is + // corruption/external interference and must not be adopted. + if errors.Is(err, fs.ErrExist) || errors.Is(err, syscall.ENOTEMPTY) { + if info, statErr := os.Stat(targetPath); statErr != nil || !info.IsDir() { + return fmt.Errorf("snapshot path %q exists but is not a usable directory: %w", targetPath, err) + } + logger.Info("reusing existing snapshot directory, dropping redundant temp", + "snapshotDir", snapshotDir, + ) + if rmErr := os.RemoveAll(path); rmErr != nil { + return rmErr + } + return updateCurrentSymlink(db.dir, snapshotDir) + } logger.Error("failed to rename snapshot directory, cleaning up", "tmpDir", tmpDir, "targetDir", snapshotDir, @@ -1231,6 +1249,11 @@ func initEmptyDB(dir string, initialVersion uint32) error { // it could fail under concurrent usage for tmp file conflicts. func updateCurrentSymlink(dir, snapshot string) error { tmpPath := currentTmpPath(dir) + // A crash between Symlink and Rename can leave current-tmp behind; remove it + // so a re-offered restore is idempotent rather than failing with EEXIST. + if err := os.Remove(tmpPath); err != nil && !errors.Is(err, fs.ErrNotExist) { + return err + } if err := os.Symlink(snapshot, tmpPath); err != nil { return err } diff --git a/sei-db/state_db/sc/memiavl/db_test.go b/sei-db/state_db/sc/memiavl/db_test.go index 0aed93de51..b99f63f7f7 100644 --- a/sei-db/state_db/sc/memiavl/db_test.go +++ b/sei-db/state_db/sc/memiavl/db_test.go @@ -1101,3 +1101,16 @@ func TestCloseWithSuccessfulBackgroundSnapshot(t *testing.T) { err = db.Close() require.NoError(t, err) } + +// A crash between Symlink and Rename can leave current-tmp behind; a re-offered +// restore must still repoint current rather than failing with EEXIST. +func TestUpdateCurrentSymlinkClearsStaleTmp(t *testing.T) { + dir := t.TempDir() + require.NoError(t, os.Symlink("snapshot-0", currentTmpPath(dir))) + + require.NoError(t, updateCurrentSymlink(dir, "snapshot-1")) + + target, err := os.Readlink(currentPath(dir)) + require.NoError(t, err) + require.Equal(t, "snapshot-1", target) +} diff --git a/sei-db/state_db/sc/memiavl/import.go b/sei-db/state_db/sc/memiavl/import.go index e0c1910c7a..a758053709 100644 --- a/sei-db/state_db/sc/memiavl/import.go +++ b/sei-db/state_db/sc/memiavl/import.go @@ -2,10 +2,13 @@ package memiavl import ( "context" + "errors" "fmt" + "io/fs" "math" "os" "path/filepath" + "syscall" "time" "github.com/sei-protocol/sei-chain/sei-db/proto" @@ -48,13 +51,20 @@ func NewMultiTreeImporter(dir string, height uint64) (*MultiTreeImporter, error) return nil, fmt.Errorf("fail to lock db: %w", err) } - return &MultiTreeImporter{ + mti := &MultiTreeImporter{ dir: dir, height: int64(height), snapshotDir: snapshotName(int64(height)), fileLock: fileLock, ctx: context.Background(), // Default to background context for backward compatibility - }, nil + } + // State-sync can re-offer the same snapshot, so a prior pass may have left a + // temp dir at this height; clear it so it can't poison this import. + if err := os.RemoveAll(mti.tmpDir()); err != nil { + _ = fileLock.Unlock() + return nil, fmt.Errorf("fail to clear stale import tmp dir: %w", err) + } + return mti, nil } func (mti *MultiTreeImporter) tmpDir() string { @@ -87,7 +97,15 @@ func (mti *MultiTreeImporter) AddNode(node *types.SnapshotNode) { mti.importer.Add(node) } -func (mti *MultiTreeImporter) Close() error { +func (mti *MultiTreeImporter) Close() (err error) { + // Release the import flock on every return path; a leaked lock fails a + // same-process restore re-offer with ErrLocked. + defer func() { + if unlockErr := mti.fileLock.Unlock(); unlockErr != nil && err == nil { + err = unlockErr + } + }() + if mti.importer != nil { if err := mti.importer.Close(); err != nil { return err @@ -100,14 +118,34 @@ func (mti *MultiTreeImporter) Close() error { return err } - if err := os.Rename(tmpDir, filepath.Join(mti.dir, mti.snapshotDir)); err != nil { - return err + // A re-offered restore may have already produced snapshot-; adopt it and + // drop our temp instead of failing. The ErrExist/ENOTEMPTY arm covers + // rename-into-existing-dir across kernels (EEXIST darwin, ENOTEMPTY linux). + finalDir := filepath.Join(mti.dir, mti.snapshotDir) + if info, statErr := os.Stat(finalDir); statErr == nil { + // Only a directory is a valid prior snapshot; a non-directory at this path + // is corruption/external interference and must not be adopted. + if !info.IsDir() { + return fmt.Errorf("snapshot path %q exists but is not a directory", finalDir) + } + if rmErr := os.RemoveAll(tmpDir); rmErr != nil { + return rmErr + } + } else if err := os.Rename(tmpDir, finalDir); err != nil { + if !errors.Is(err, fs.ErrExist) && !errors.Is(err, syscall.ENOTEMPTY) { + return err + } + // finalDir appeared between the stat and the rename; only a directory is a + // valid prior snapshot, so don't adopt a non-directory. + if info, statErr := os.Stat(finalDir); statErr != nil || !info.IsDir() { + return fmt.Errorf("snapshot path %q exists but is not a directory: %w", finalDir, err) + } + if rmErr := os.RemoveAll(tmpDir); rmErr != nil { + return rmErr + } } - if err := updateCurrentSymlink(mti.dir, mti.snapshotDir); err != nil { - return err - } - return mti.fileLock.Unlock() + return updateCurrentSymlink(mti.dir, mti.snapshotDir) } // TreeImporter import a single memiavl tree from state-sync snapshot diff --git a/sei-tendermint/internal/blocksync/reactor.go b/sei-tendermint/internal/blocksync/reactor.go index 24db695a30..a80332853b 100644 --- a/sei-tendermint/internal/blocksync/reactor.go +++ b/sei-tendermint/internal/blocksync/reactor.go @@ -546,6 +546,9 @@ func (s *syncController) poolRoutine(ctx context.Context, pool *BlockPool, initi firstID := types.BlockID{Hash: first.Hash(), PartSetHeader: firstParts.Header()} err = state.Validators.VerifyCommitLight(chainID, firstID, first.Height, second.LastCommit) + if err != nil { + err = types.DefaultConsensusPolicy().HandleError(fmt.Errorf("%w: %w", types.ErrLastCommitVerify, err)) + } if err == nil { err = s.blockExec.ValidateBlock(ctx, state, first) } diff --git a/sei-tendermint/internal/blocksync/reactor_test.go b/sei-tendermint/internal/blocksync/reactor_test.go index 075c06f46a..cf0eca7ae0 100644 --- a/sei-tendermint/internal/blocksync/reactor_test.go +++ b/sei-tendermint/internal/blocksync/reactor_test.go @@ -2,7 +2,6 @@ package blocksync import ( "context" - "errors" "runtime" "strings" "testing" @@ -460,164 +459,6 @@ func TestAutoRestartIfBehind(t *testing.T) { } } -func makeValidationFailurePair( - ctx context.Context, - t *testing.T, - testRootName string, -) (sm.State, *types.Block, *types.Block) { - t.Helper() - - cfg, err := config.ResetTestRoot(t.TempDir(), testRootName) - require.NoError(t, err) - - valSet, privVals := factory.ValidatorSet(ctx, 1, 30) - genDoc := factory.GenesisDoc(cfg, time.Now(), valSet.Validators, factory.ConsensusParams()) - initialState, err := sm.MakeGenesisState(genDoc) - require.NoError(t, err) - - lastCommit := &types.Commit{} - block1, _, _, seenCommit1 := makeNextBlock(ctx, t, initialState, privVals[0], 1, lastCommit) - block2, _, _, _ := makeNextBlock(ctx, t, initialState, privVals[0], 2, seenCommit1) - - badBlock2Proto, err := block2.ToProto() - require.NoError(t, err) - badBlock2Proto.LastCommit.Signatures[0].Signature[0] ^= 0xFF - badCommit, err := types.CommitFromProto(badBlock2Proto.LastCommit) - require.NoError(t, err) - badBlock2Proto.Header.LastCommitHash = badCommit.Hash() - badBlock2, err := types.BlockFromProto(badBlock2Proto) - require.NoError(t, err) - - return initialState, block1, badBlock2 -} - -func TestPoolRoutine_DoesNotReturnOnValidationFailure(t *testing.T) { - ctx := t.Context() - - initialState, block1, badBlock2 := makeValidationFailurePair(ctx, t, "block_sync_validation_failure_does_not_return") - - badPeer := types.NodeID(strings.Repeat("a", 40)) - goodPeer := types.NodeID(strings.Repeat("b", 40)) - router := makeRouter(testPeers{ - badPeer: {id: badPeer, base: 1, height: 2, inputChan: make(chan inputData, 1)}, - goodPeer: {id: goodPeer, base: 1, height: 2, inputChan: make(chan inputData, 1)}, - }) - pool := NewBlockPool(1, router) - done := make(chan error, 1) - go func() { done <- pool.run(ctx) }() - t.Cleanup(func() { - if err := <-done; err != nil && !errors.Is(err, context.Canceled) { - t.Fatalf("pool.run(): %v", err) - } - }) - pool.SetPeerRange(badPeer, 1, 2) - - evictNetwork := p2p.MakeTestNetwork(t, p2p.TestNetworkOptions{NumNodes: 1}) - syncer := &syncController{ - router: evictNetwork.Node(evictNetwork.NodeIDs()[0]).Router, - metrics: consensus.NopMetrics(), - } - - results := make(chan error, 1) - go func() { - _, err := syncer.poolRoutine(ctx, pool, initialState, false) - results <- err - }() - t.Cleanup(func() { - err := <-results - require.ErrorIs(t, err, context.Canceled) - }) - - introducedGoodPeer := false - for { - select { - case err := <-results: - t.Fatalf("poolRoutine returned early after validation failure: %v", err) - case request := <-pool.Requests(): - if request.PeerID == goodPeer { - return - } - - switch request.Height { - case 1: - _ = pool.AddBlock(request.PeerID, block1, block1.Size()) - case 2: - _ = pool.AddBlock(request.PeerID, badBlock2, badBlock2.Size()) - if !introducedGoodPeer { - introducedGoodPeer = true - pool.SetPeerRange(goodPeer, 1, 2) - } - } - } - } -} - -func TestPoolRoutine_RetriesAfterValidationFailure(t *testing.T) { - ctx := t.Context() - - initialState, block1, badBlock2 := makeValidationFailurePair(ctx, t, "block_sync_retry_after_validation_failure") - network := p2p.MakeTestNetwork(t, p2p.TestNetworkOptions{NumNodes: 1}) - - badPeer := types.NodeID(strings.Repeat("a", 40)) - goodPeer1 := types.NodeID(strings.Repeat("b", 40)) - goodPeer2 := types.NodeID(strings.Repeat("c", 40)) - peers := testPeers{ - badPeer: {id: badPeer, base: 1, height: 2, inputChan: make(chan inputData, 1)}, - goodPeer1: {id: goodPeer1, base: 1, height: 2, inputChan: make(chan inputData, 1)}, - goodPeer2: {id: goodPeer2, base: 1, height: 2, inputChan: make(chan inputData, 1)}, - } - pool := NewBlockPool(1, makeRouter(peers)) - runPoolForTest(t, pool) - pool.SetPeerRange(badPeer, 1, 2) - - syncer := &syncController{ - router: network.Node(network.NodeIDs()[0]).Router, - metrics: consensus.NopMetrics(), - } - - results := make(chan error, 1) - go func() { - _, err := syncer.poolRoutine(ctx, pool, initialState, false) - results <- err - }() - t.Cleanup(func() { - err := <-results - require.ErrorIs(t, err, context.Canceled) - }) - - introducedGoodPeers := false - height1Requests := map[types.NodeID]int{} - - for { - select { - case err := <-results: - t.Fatalf("poolRoutine returned before retry was observed: %v", err) - case request := <-pool.Requests(): - if request.Height == 1 { - height1Requests[request.PeerID]++ - if request.PeerID != badPeer && height1Requests[request.PeerID] == 1 { - return - } - } - - if request.PeerID == badPeer && request.Height == 2 && !introducedGoodPeers { - introducedGoodPeers = true - pool.SetPeerRange(goodPeer1, 1, 2) - pool.SetPeerRange(goodPeer2, 1, 2) - } - - if request.PeerID == badPeer { - switch request.Height { - case 1: - _ = pool.AddBlock(request.PeerID, block1, block1.Size()) - case 2: - _ = pool.AddBlock(request.PeerID, badBlock2, badBlock2.Size()) - } - } - } - } -} - func TestQueryResponder_ServesBlockRequestsWhenBlockSyncDisabled(t *testing.T) { ctx := t.Context() diff --git a/sei-tendermint/internal/blocksync/reactor_validation_failure_test.go b/sei-tendermint/internal/blocksync/reactor_validation_failure_test.go new file mode 100644 index 0000000000..453a797426 --- /dev/null +++ b/sei-tendermint/internal/blocksync/reactor_validation_failure_test.go @@ -0,0 +1,182 @@ +//go:build !mock_chain_validation + +// These tests drive a peer-supplied block whose commit fails verification and +// assert the routine evicts/retries instead of applying it. mock_chain_validation +// swallows the commit-verify failure (ErrLastCommitVerify) and applies the block; +// other builds keep the production eviction/retry path, so only that build is excluded. +package blocksync + +import ( + "context" + "errors" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/require" + + "github.com/sei-protocol/sei-chain/sei-tendermint/config" + "github.com/sei-protocol/sei-chain/sei-tendermint/internal/consensus" + "github.com/sei-protocol/sei-chain/sei-tendermint/internal/p2p" + sm "github.com/sei-protocol/sei-chain/sei-tendermint/internal/state" + "github.com/sei-protocol/sei-chain/sei-tendermint/internal/test/factory" + "github.com/sei-protocol/sei-chain/sei-tendermint/types" +) + +func makeValidationFailurePair( + ctx context.Context, + t *testing.T, + testRootName string, +) (sm.State, *types.Block, *types.Block) { + t.Helper() + + cfg, err := config.ResetTestRoot(t.TempDir(), testRootName) + require.NoError(t, err) + + valSet, privVals := factory.ValidatorSet(ctx, 1, 30) + genDoc := factory.GenesisDoc(cfg, time.Now(), valSet.Validators, factory.ConsensusParams()) + initialState, err := sm.MakeGenesisState(genDoc) + require.NoError(t, err) + + lastCommit := &types.Commit{} + block1, _, _, seenCommit1 := makeNextBlock(ctx, t, initialState, privVals[0], 1, lastCommit) + block2, _, _, _ := makeNextBlock(ctx, t, initialState, privVals[0], 2, seenCommit1) + + badBlock2Proto, err := block2.ToProto() + require.NoError(t, err) + badBlock2Proto.LastCommit.Signatures[0].Signature[0] ^= 0xFF + badCommit, err := types.CommitFromProto(badBlock2Proto.LastCommit) + require.NoError(t, err) + badBlock2Proto.Header.LastCommitHash = badCommit.Hash() + badBlock2, err := types.BlockFromProto(badBlock2Proto) + require.NoError(t, err) + + return initialState, block1, badBlock2 +} + +func TestPoolRoutine_DoesNotReturnOnValidationFailure(t *testing.T) { + ctx := t.Context() + + initialState, block1, badBlock2 := makeValidationFailurePair(ctx, t, "block_sync_validation_failure_does_not_return") + + badPeer := types.NodeID(strings.Repeat("a", 40)) + goodPeer := types.NodeID(strings.Repeat("b", 40)) + router := makeRouter(testPeers{ + badPeer: {id: badPeer, base: 1, height: 2, inputChan: make(chan inputData, 1)}, + goodPeer: {id: goodPeer, base: 1, height: 2, inputChan: make(chan inputData, 1)}, + }) + pool := NewBlockPool(1, router) + done := make(chan error, 1) + go func() { done <- pool.run(ctx) }() + t.Cleanup(func() { + if err := <-done; err != nil && !errors.Is(err, context.Canceled) { + t.Fatalf("pool.run(): %v", err) + } + }) + pool.SetPeerRange(badPeer, 1, 2) + + evictNetwork := p2p.MakeTestNetwork(t, p2p.TestNetworkOptions{NumNodes: 1}) + syncer := &syncController{ + router: evictNetwork.Node(evictNetwork.NodeIDs()[0]).Router, + metrics: consensus.NopMetrics(), + } + + results := make(chan error, 1) + go func() { + _, err := syncer.poolRoutine(ctx, pool, initialState, false) + results <- err + }() + t.Cleanup(func() { + err := <-results + require.ErrorIs(t, err, context.Canceled) + }) + + introducedGoodPeer := false + for { + select { + case err := <-results: + t.Fatalf("poolRoutine returned early after validation failure: %v", err) + case request := <-pool.Requests(): + if request.PeerID == goodPeer { + return + } + + switch request.Height { + case 1: + _ = pool.AddBlock(request.PeerID, block1, block1.Size()) + case 2: + _ = pool.AddBlock(request.PeerID, badBlock2, badBlock2.Size()) + if !introducedGoodPeer { + introducedGoodPeer = true + pool.SetPeerRange(goodPeer, 1, 2) + } + } + } + } +} + +func TestPoolRoutine_RetriesAfterValidationFailure(t *testing.T) { + ctx := t.Context() + + initialState, block1, badBlock2 := makeValidationFailurePair(ctx, t, "block_sync_retry_after_validation_failure") + network := p2p.MakeTestNetwork(t, p2p.TestNetworkOptions{NumNodes: 1}) + + badPeer := types.NodeID(strings.Repeat("a", 40)) + goodPeer1 := types.NodeID(strings.Repeat("b", 40)) + goodPeer2 := types.NodeID(strings.Repeat("c", 40)) + peers := testPeers{ + badPeer: {id: badPeer, base: 1, height: 2, inputChan: make(chan inputData, 1)}, + goodPeer1: {id: goodPeer1, base: 1, height: 2, inputChan: make(chan inputData, 1)}, + goodPeer2: {id: goodPeer2, base: 1, height: 2, inputChan: make(chan inputData, 1)}, + } + pool := NewBlockPool(1, makeRouter(peers)) + runPoolForTest(t, pool) + pool.SetPeerRange(badPeer, 1, 2) + + syncer := &syncController{ + router: network.Node(network.NodeIDs()[0]).Router, + metrics: consensus.NopMetrics(), + } + + results := make(chan error, 1) + go func() { + _, err := syncer.poolRoutine(ctx, pool, initialState, false) + results <- err + }() + t.Cleanup(func() { + err := <-results + require.ErrorIs(t, err, context.Canceled) + }) + + introducedGoodPeers := false + height1Requests := map[types.NodeID]int{} + + for { + select { + case err := <-results: + t.Fatalf("poolRoutine returned before retry was observed: %v", err) + case request := <-pool.Requests(): + if request.Height == 1 { + height1Requests[request.PeerID]++ + if request.PeerID != badPeer && height1Requests[request.PeerID] == 1 { + return + } + } + + if request.PeerID == badPeer && request.Height == 2 && !introducedGoodPeers { + introducedGoodPeers = true + pool.SetPeerRange(goodPeer1, 1, 2) + pool.SetPeerRange(goodPeer2, 1, 2) + } + + if request.PeerID == badPeer { + switch request.Height { + case 1: + _ = pool.AddBlock(request.PeerID, block1, block1.Size()) + case 2: + _ = pool.AddBlock(request.PeerID, badBlock2, badBlock2.Size()) + } + } + } + } +} diff --git a/sei-tendermint/internal/consensus/state_badproposal_default_test.go b/sei-tendermint/internal/consensus/state_badproposal_default_test.go new file mode 100644 index 0000000000..ab4cf29891 --- /dev/null +++ b/sei-tendermint/internal/consensus/state_badproposal_default_test.go @@ -0,0 +1,63 @@ +//go:build !mock_chain_validation && !mock_block_validation + +// A proposal carrying a bad AppHash is rejected and prevoted nil only in the +// default build; a mock validation build swallows the AppHash mismatch and +// prevotes for the block, so this assertion is default-build only. +package consensus + +import ( + "testing" + + "github.com/sei-protocol/sei-chain/sei-tendermint/crypto" + "github.com/sei-protocol/sei-chain/sei-tendermint/libs/utils" + "github.com/sei-protocol/sei-chain/sei-tendermint/libs/utils/require" + tmproto "github.com/sei-protocol/sei-chain/sei-tendermint/proto/tendermint/types" + "github.com/sei-protocol/sei-chain/sei-tendermint/types" +) + +func TestStateBadProposal(t *testing.T) { + config := configSetup(t) + ctx := t.Context() + + cs1, vss := makeState(ctx, t, makeStateArgs{config: config, validators: 2}) + height, round := cs1.roundState.Height(), cs1.roundState.Round() + vs2 := vss[1] + + partSize := types.BlockPartSizeBytes + proposalCh := subscribe(ctx, t, cs1.eventBus, types.EventQueryCompleteProposal) + voteCh := subscribe(ctx, t, cs1.eventBus, types.EventQueryVote) + + propBlock, err := cs1.createProposalBlock(ctx) + require.NoError(t, err) + + round++ + incrementRound(vss[1:]...) + + stateHash := propBlock.AppHash + if len(stateHash) == 0 { + stateHash = make([]byte, 32) + } + stateHash[0] = (stateHash[0] + 1) % 255 + propBlock.AppHash = stateHash + propBlockParts, err := propBlock.MakePartSet(partSize) + require.NoError(t, err) + blockID := types.BlockID{Hash: propBlock.Hash(), PartSetHeader: propBlockParts.Header()} + pubKey, err := vss[1].PrivValidator.GetPubKey(ctx) + require.NoError(t, err) + proposal := types.NewProposal(vs2.Height, round, -1, blockID, propBlock.Header.Time, propBlock.GetTxHashes(), propBlock.Header, propBlock.LastCommit, propBlock.Evidence, pubKey.Address()) + p := proposal.ToProto() + require.NoError(t, vs2.SignProposal(ctx, config.ChainID(), p)) + proposal.Signature = utils.OrPanic1(crypto.SigFromBytes(p.Signature)) + + err = cs1.SetProposalAndBlock(ctx, proposal, propBlock, propBlockParts, "some peer") + require.NoError(t, err) + + cs1.startTestRound(ctx, height, round) + ensureProposal(t, proposalCh, height, round, blockID) + ensurePrevoteMatch(t, voteCh, height, round, nil) + cs1.signAddVotes(ctx, t, tmproto.PrevoteType, config.ChainID(), blockID, vs2) + ensurePrevote(t, voteCh, height, round) + ensurePrecommit(t, voteCh, height, round) + cs1.validatePrecommit(ctx, t, round, -1, vss[0], nil, nil) + cs1.signAddVotes(ctx, t, tmproto.PrecommitType, config.ChainID(), blockID, vs2) +} diff --git a/sei-tendermint/internal/consensus/state_test.go b/sei-tendermint/internal/consensus/state_test.go index 1eedfd7ecf..c38a9c4a5c 100644 --- a/sei-tendermint/internal/consensus/state_test.go +++ b/sei-tendermint/internal/consensus/state_test.go @@ -201,53 +201,6 @@ func TestStateEnterProposeYesPrivValidator(t *testing.T) { ensureNoNewTimeout(t, timeoutCh, cs.state.ConsensusParams.Timeout.ProposeTimeout(round).Nanoseconds()) } -func TestStateBadProposal(t *testing.T) { - config := configSetup(t) - ctx := t.Context() - - cs1, vss := makeState(ctx, t, makeStateArgs{config: config, validators: 2}) - height, round := cs1.roundState.Height(), cs1.roundState.Round() - vs2 := vss[1] - - partSize := types.BlockPartSizeBytes - proposalCh := subscribe(ctx, t, cs1.eventBus, types.EventQueryCompleteProposal) - voteCh := subscribe(ctx, t, cs1.eventBus, types.EventQueryVote) - - propBlock, err := cs1.createProposalBlock(ctx) - require.NoError(t, err) - - round++ - incrementRound(vss[1:]...) - - stateHash := propBlock.AppHash - if len(stateHash) == 0 { - stateHash = make([]byte, 32) - } - stateHash[0] = (stateHash[0] + 1) % 255 - propBlock.AppHash = stateHash - propBlockParts, err := propBlock.MakePartSet(partSize) - require.NoError(t, err) - blockID := types.BlockID{Hash: propBlock.Hash(), PartSetHeader: propBlockParts.Header()} - pubKey, err := vss[1].PrivValidator.GetPubKey(ctx) - require.NoError(t, err) - proposal := types.NewProposal(vs2.Height, round, -1, blockID, propBlock.Header.Time, propBlock.GetTxHashes(), propBlock.Header, propBlock.LastCommit, propBlock.Evidence, pubKey.Address()) - p := proposal.ToProto() - require.NoError(t, vs2.SignProposal(ctx, config.ChainID(), p)) - proposal.Signature = utils.OrPanic1(crypto.SigFromBytes(p.Signature)) - - err = cs1.SetProposalAndBlock(ctx, proposal, propBlock, propBlockParts, "some peer") - require.NoError(t, err) - - cs1.startTestRound(ctx, height, round) - ensureProposal(t, proposalCh, height, round, blockID) - ensurePrevoteMatch(t, voteCh, height, round, nil) - cs1.signAddVotes(ctx, t, tmproto.PrevoteType, config.ChainID(), blockID, vs2) - ensurePrevote(t, voteCh, height, round) - ensurePrecommit(t, voteCh, height, round) - cs1.validatePrecommit(ctx, t, round, -1, vss[0], nil, nil) - cs1.signAddVotes(ctx, t, tmproto.PrecommitType, config.ChainID(), blockID, vs2) -} - func TestStateOversizedBlock(t *testing.T) { config := configSetup(t) ctx := t.Context() diff --git a/sei-tendermint/internal/state/execution.go b/sei-tendermint/internal/state/execution.go index e422e2b7a6..23b35dae0a 100644 --- a/sei-tendermint/internal/state/execution.go +++ b/sei-tendermint/internal/state/execution.go @@ -514,21 +514,30 @@ func buildLastCommitInfo(block *types.Block, store Store, initialHeight int64) a valSetLen = len(lastValSet.Validators) ) - // ensure that the size of the validator set in the last commit matches - // the size of the validator set in the state store. + // Route a commit/validator-set size divergence through the policy; if it does + // not halt, the votes below are built best-effort, where the per-index + // Signatures/Validators pairing is only approximate -- acceptable because + // LastCommitInfo feeds staking rewards/downtime, never the EVM state under audit. if commitSize != valSetLen { - panic(fmt.Sprintf( - "commit size (%d) doesn't match validator set length (%d) at height %d\n\n%v\n\n%v", - commitSize, valSetLen, block.Height, block.LastCommit.Signatures, lastValSet.Validators, - )) + mismatch := fmt.Errorf( + "commit size (%d) doesn't match validator set length (%d) at height %d: %w", + commitSize, valSetLen, block.Height, types.ErrLastCommitVerify) + if err := types.DefaultConsensusPolicy().HandleError(mismatch); err != nil { + // Dump the full commit + validator set on the (production) panic path + // only, where it aids post-mortem; the swallow path skips this. + panic(fmt.Errorf("%w\n\n%v\n\n%v", err, block.LastCommit.Signatures, lastValSet.Validators)) + } } - votes := make([]abci.VoteInfo, block.LastCommit.Size()) + votes := make([]abci.VoteInfo, valSetLen) for i, val := range lastValSet.Validators { - commitSig := block.LastCommit.Signatures[i] + signedLastBlock := false + if i < commitSize { + signedLastBlock = block.LastCommit.Signatures[i].BlockIDFlag != types.BlockIDFlagAbsent + } votes[i] = abci.VoteInfo{ Validator: types.TM2PB.Validator(val), - SignedLastBlock: commitSig.BlockIDFlag != types.BlockIDFlagAbsent, + SignedLastBlock: signedLastBlock, } } diff --git a/sei-tendermint/internal/state/execution_lastcommit_tolerate_test.go b/sei-tendermint/internal/state/execution_lastcommit_tolerate_test.go new file mode 100644 index 0000000000..2ad4769ad9 --- /dev/null +++ b/sei-tendermint/internal/state/execution_lastcommit_tolerate_test.go @@ -0,0 +1,51 @@ +//go:build mock_chain_validation + +package state_test + +import ( + "testing" + + "github.com/stretchr/testify/require" + + abci "github.com/sei-protocol/sei-chain/sei-tendermint/abci/types" + sm "github.com/sei-protocol/sei-chain/sei-tendermint/internal/state" + "github.com/sei-protocol/sei-chain/sei-tendermint/types" +) + +// lastCommitFakeStore implements only LoadValidators; buildLastCommitInfo calls +// nothing else. Embedding the interface leaves the rest nil (panics if touched), +// which keeps the fake honest about what the function under test depends on. +type lastCommitFakeStore struct { + sm.Store + vals *types.ValidatorSet +} + +func (f lastCommitFakeStore) LoadValidators(int64) (*types.ValidatorSet, error) { + return f.vals, nil +} + +// Under mock_chain_validation, buildLastCommitInfo must build best-effort commit +// info rather than panic when the commit size diverges from the validator set. +// Pins: votes sized by the valset, present signatures applied, absent slots not-signed. +func TestBuildLastCommitInfo_ToleratesCommitValSetMismatch(t *testing.T) { + valSet := genValSet(3) + store := lastCommitFakeStore{vals: valSet} + + // Commit at height 1 with only ONE signature vs three validators. + block := &types.Block{ + Header: types.Header{Height: 2}, + LastCommit: &types.Commit{ + Height: 1, + Round: 0, + Signatures: []types.CommitSig{{BlockIDFlag: types.BlockIDFlagCommit}}, + }, + } + + var ci abci.CommitInfo + require.NotPanics(t, func() { ci = sm.BuildLastCommitInfo(block, store, 1) }) + + require.Len(t, ci.Votes, 3, "votes are sized by the validator set, not the (shorter) commit") + require.True(t, ci.Votes[0].SignedLastBlock, "the one present signature is applied") + require.False(t, ci.Votes[1].SignedLastBlock, "validators beyond the commit are not-signed") + require.False(t, ci.Votes[2].SignedLastBlock) +} diff --git a/sei-tendermint/internal/state/export_test.go b/sei-tendermint/internal/state/export_test.go index 363251e714..094ba238b5 100644 --- a/sei-tendermint/internal/state/export_test.go +++ b/sei-tendermint/internal/state/export_test.go @@ -13,3 +13,9 @@ func ValidateValidatorUpdates(abciUpdates []abci.ValidatorUpdate, params types.V // ProposerPriorityHashInterval is the interval constant exposed for testing. const ProposerPriorityHashInterval = proposerPriorityHashInterval + +// BuildLastCommitInfo is an alias for buildLastCommitInfo exported for testing +// the mock_chain_validation best-effort path (commit/validator-set size mismatch). +func BuildLastCommitInfo(block *types.Block, store Store, initialHeight int64) abci.CommitInfo { + return buildLastCommitInfo(block, store, initialHeight) +} diff --git a/sei-tendermint/internal/state/validation_header_default_test.go b/sei-tendermint/internal/state/validation_header_default_test.go new file mode 100644 index 0000000000..f3fd4376c7 --- /dev/null +++ b/sei-tendermint/internal/state/validation_header_default_test.go @@ -0,0 +1,119 @@ +//go:build !mock_chain_validation && !mock_block_validation + +// TestValidateBlockHeader asserts that any header-field defect fails validation. +// Its table includes an AppHash mutation, and AppHash is swallowed by both +// mock_chain_validation and mock_block_validation, so this test is default-build +// only. (validationTestsStopHeight is defined in validation_test.go.) +package state_test + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + dbm "github.com/tendermint/tm-db" + + "github.com/sei-protocol/sei-chain/sei-tendermint/crypto" + "github.com/sei-protocol/sei-chain/sei-tendermint/crypto/ed25519" + "github.com/sei-protocol/sei-chain/sei-tendermint/internal/eventbus" + "github.com/sei-protocol/sei-chain/sei-tendermint/internal/proxy" + sm "github.com/sei-protocol/sei-chain/sei-tendermint/internal/state" + statefactory "github.com/sei-protocol/sei-chain/sei-tendermint/internal/state/test/factory" + "github.com/sei-protocol/sei-chain/sei-tendermint/internal/store" + "github.com/sei-protocol/sei-chain/sei-tendermint/types" +) + +func TestValidateBlockHeader(t *testing.T) { + ctx := t.Context() + + app := &testApp{} + + eventBus := eventbus.NewDefault() + require.NoError(t, eventBus.Start(ctx)) + + state, stateDB, privVals := makeState(t, 3, 1) + stateStore := sm.NewStore(stateDB) + proxyApp := proxy.New(app, proxy.NopMetrics()) + mp := makeTxMempool(t, proxyApp) + + blockStore := store.NewBlockStore(dbm.NewMemDB()) + blockExec := sm.NewBlockExecutor( + stateStore, + proxyApp, + mp, + sm.EmptyEvidencePool{}, + blockStore, + eventBus, + sm.NopMetrics(), + types.DefaultConsensusPolicy(), + ) + lastCommit := &types.Commit{} + + // some bad values + wrongHash := crypto.Checksum([]byte("this hash is wrong")).Bytes() + wrongVersion1 := state.Version.Consensus + wrongVersion1.Block += 2 + wrongVersion2 := state.Version.Consensus + wrongVersion2.App += 2 + + // Manipulation of any header field causes failure. + testCases := []struct { + name string + malleateBlock func(block *types.Block) + }{ + {"Version wrong1", func(block *types.Block) { block.Version = wrongVersion1 }}, + {"Version wrong2", func(block *types.Block) { block.Version = wrongVersion2 }}, + {"ChainID wrong", func(block *types.Block) { block.ChainID = "not-the-real-one" }}, + {"Height wrong", func(block *types.Block) { block.Height += 10 }}, + {"Time wrong", func(block *types.Block) { block.Time = block.Time.Add(-time.Second * 1) }}, + + {"LastBlockID wrong", func(block *types.Block) { block.LastBlockID.PartSetHeader.Total += 10 }}, + {"LastCommitHash wrong", func(block *types.Block) { block.LastCommitHash = wrongHash }}, + {"DataHash wrong", func(block *types.Block) { block.DataHash = wrongHash }}, + + {"ValidatorsHash wrong", func(block *types.Block) { block.ValidatorsHash = wrongHash }}, + {"NextValidatorsHash wrong", func(block *types.Block) { block.NextValidatorsHash = wrongHash }}, + {"ConsensusHash wrong", func(block *types.Block) { block.ConsensusHash = wrongHash }}, + {"AppHash wrong", func(block *types.Block) { block.AppHash = wrongHash }}, + {"LastResultsHash wrong", func(block *types.Block) { block.LastResultsHash = wrongHash }}, + + {"EvidenceHash wrong", func(block *types.Block) { block.EvidenceHash = wrongHash }}, + {"Proposer wrong", func(block *types.Block) { + block.ProposerAddress = ed25519.GenerateSecretKey().Public().Address() + }}, + {"Proposer invalid", func(block *types.Block) { block.ProposerAddress = []byte("wrong size") }}, + + {"first LastCommit contains signatures", func(block *types.Block) { + block.LastCommit = &types.Commit{Signatures: []types.CommitSig{types.NewCommitSigAbsent()}} + block.LastCommitHash = block.LastCommit.Hash() + }}, + } + + // Build up state for multiple heights + for height := int64(1); height < validationTestsStopHeight; height++ { + /* + Invalid blocks don't pass + */ + for _, tc := range testCases { + block := statefactory.MakeBlock(state, height, lastCommit) + tc.malleateBlock(block) + err := blockExec.ValidateBlock(ctx, state, block) + t.Logf("%s: %v", tc.name, err) + require.Error(t, err, tc.name) + } + + /* + A good block passes + */ + state, _, lastCommit = makeAndCommitGoodBlock(ctx, t, + state, height, lastCommit, state.Validators.GetProposer().Address, blockExec, privVals, nil) + } + + nextHeight := validationTestsStopHeight + block := statefactory.MakeBlock(state, nextHeight, lastCommit) + state.InitialHeight = nextHeight + 1 + err := blockExec.ValidateBlock(ctx, state, block) + require.Error(t, err, "expected an error when state is ahead of block") + assert.Contains(t, err.Error(), "lower than initial height") +} diff --git a/sei-tendermint/internal/state/validation_test.go b/sei-tendermint/internal/state/validation_test.go index 405e7f1d77..84aeed2c04 100644 --- a/sei-tendermint/internal/state/validation_test.go +++ b/sei-tendermint/internal/state/validation_test.go @@ -1,3 +1,10 @@ +//go:build !mock_chain_validation + +// These tests assert that block validation halts on a bad commit or excess +// evidence. mock_chain_validation swallows those sentinels (ErrLastCommitVerify, +// ErrTooMuchEvidence) -- mock_block_validation does not -- so they are excluded +// only under that build. TestValidateBlockHeader exercises a sentinel both mock +// builds swallow and lives in validation_header_default_test.go. package state_test import ( @@ -5,14 +12,12 @@ import ( "testing" "time" - "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" dbm "github.com/tendermint/tm-db" abci "github.com/sei-protocol/sei-chain/sei-tendermint/abci/types" "github.com/sei-protocol/sei-chain/sei-tendermint/crypto" - "github.com/sei-protocol/sei-chain/sei-tendermint/crypto/ed25519" "github.com/sei-protocol/sei-chain/sei-tendermint/internal/eventbus" "github.com/sei-protocol/sei-chain/sei-tendermint/internal/proxy" sm "github.com/sei-protocol/sei-chain/sei-tendermint/internal/state" @@ -28,100 +33,6 @@ import ( const validationTestsStopHeight int64 = 10 -func TestValidateBlockHeader(t *testing.T) { - ctx := t.Context() - - app := &testApp{} - - eventBus := eventbus.NewDefault() - require.NoError(t, eventBus.Start(ctx)) - - state, stateDB, privVals := makeState(t, 3, 1) - stateStore := sm.NewStore(stateDB) - proxyApp := proxy.New(app, proxy.NopMetrics()) - mp := makeTxMempool(t, proxyApp) - - blockStore := store.NewBlockStore(dbm.NewMemDB()) - blockExec := sm.NewBlockExecutor( - stateStore, - proxyApp, - mp, - sm.EmptyEvidencePool{}, - blockStore, - eventBus, - sm.NopMetrics(), - types.DefaultConsensusPolicy(), - ) - lastCommit := &types.Commit{} - - // some bad values - wrongHash := crypto.Checksum([]byte("this hash is wrong")).Bytes() - wrongVersion1 := state.Version.Consensus - wrongVersion1.Block += 2 - wrongVersion2 := state.Version.Consensus - wrongVersion2.App += 2 - - // Manipulation of any header field causes failure. - testCases := []struct { - name string - malleateBlock func(block *types.Block) - }{ - {"Version wrong1", func(block *types.Block) { block.Version = wrongVersion1 }}, - {"Version wrong2", func(block *types.Block) { block.Version = wrongVersion2 }}, - {"ChainID wrong", func(block *types.Block) { block.ChainID = "not-the-real-one" }}, - {"Height wrong", func(block *types.Block) { block.Height += 10 }}, - {"Time wrong", func(block *types.Block) { block.Time = block.Time.Add(-time.Second * 1) }}, - - {"LastBlockID wrong", func(block *types.Block) { block.LastBlockID.PartSetHeader.Total += 10 }}, - {"LastCommitHash wrong", func(block *types.Block) { block.LastCommitHash = wrongHash }}, - {"DataHash wrong", func(block *types.Block) { block.DataHash = wrongHash }}, - - {"ValidatorsHash wrong", func(block *types.Block) { block.ValidatorsHash = wrongHash }}, - {"NextValidatorsHash wrong", func(block *types.Block) { block.NextValidatorsHash = wrongHash }}, - {"ConsensusHash wrong", func(block *types.Block) { block.ConsensusHash = wrongHash }}, - {"AppHash wrong", func(block *types.Block) { block.AppHash = wrongHash }}, - {"LastResultsHash wrong", func(block *types.Block) { block.LastResultsHash = wrongHash }}, - - {"EvidenceHash wrong", func(block *types.Block) { block.EvidenceHash = wrongHash }}, - {"Proposer wrong", func(block *types.Block) { - block.ProposerAddress = ed25519.GenerateSecretKey().Public().Address() - }}, - {"Proposer invalid", func(block *types.Block) { block.ProposerAddress = []byte("wrong size") }}, - - {"first LastCommit contains signatures", func(block *types.Block) { - block.LastCommit = &types.Commit{Signatures: []types.CommitSig{types.NewCommitSigAbsent()}} - block.LastCommitHash = block.LastCommit.Hash() - }}, - } - - // Build up state for multiple heights - for height := int64(1); height < validationTestsStopHeight; height++ { - /* - Invalid blocks don't pass - */ - for _, tc := range testCases { - block := statefactory.MakeBlock(state, height, lastCommit) - tc.malleateBlock(block) - err := blockExec.ValidateBlock(ctx, state, block) - t.Logf("%s: %v", tc.name, err) - require.Error(t, err, tc.name) - } - - /* - A good block passes - */ - state, _, lastCommit = makeAndCommitGoodBlock(ctx, t, - state, height, lastCommit, state.Validators.GetProposer().Address, blockExec, privVals, nil) - } - - nextHeight := validationTestsStopHeight - block := statefactory.MakeBlock(state, nextHeight, lastCommit) - state.InitialHeight = nextHeight + 1 - err := blockExec.ValidateBlock(ctx, state, block) - require.Error(t, err, "expected an error when state is ahead of block") - assert.Contains(t, err.Error(), "lower than initial height") -} - func TestValidateBlockCommit(t *testing.T) { ctx := t.Context() diff --git a/sei-tendermint/internal/statesync/syncer.go b/sei-tendermint/internal/statesync/syncer.go index 2394425a2f..668c27bd35 100644 --- a/sei-tendermint/internal/statesync/syncer.go +++ b/sei-tendermint/internal/statesync/syncer.go @@ -575,10 +575,15 @@ func (s *syncer) verifyApp(ctx context.Context, snapshot *snapshot, appVersion u } if !bytes.Equal(snapshot.trustedAppHash, resp.LastBlockAppHash) { - logger.Error("appHash verification failed", - "expected", snapshot.trustedAppHash, - "actual", resp.LastBlockAppHash) - return errVerifyFailed + wrapped := fmt.Errorf( + "state-sync appHash mismatch: expected %X, got %X: %w", + snapshot.trustedAppHash, resp.LastBlockAppHash, types.ErrAppHash) + if err := types.DefaultConsensusPolicy().HandleError(wrapped); err != nil { + logger.Error("appHash verification failed", + "expected", snapshot.trustedAppHash, + "actual", resp.LastBlockAppHash) + return errVerifyFailed + } } if uint64(resp.LastBlockHeight) != snapshot.Height { //nolint:gosec // LastBlockHeight is a non-negative block height diff --git a/sei-tendermint/internal/statesync/syncer_test.go b/sei-tendermint/internal/statesync/syncer_test.go index 8067dc5ec4..ded95bb318 100644 --- a/sei-tendermint/internal/statesync/syncer_test.go +++ b/sei-tendermint/internal/statesync/syncer_test.go @@ -742,62 +742,6 @@ func TestSyncer_applyChunks_RejectSenders(t *testing.T) { } } -func TestSyncer_verifyApp(t *testing.T) { - boom := errors.New("boom") - const appVersion = 9 - appVersionMismatchErr := errors.New("app version mismatch. Expected: 9, got: 2") - s := &snapshot{Height: 3, Format: 1, Chunks: 5, Hash: []byte{1, 2, 3}, trustedAppHash: []byte("app_hash")} - - testcases := map[string]struct { - response *abci.ResponseInfo - err error - expectErr error - }{ - "verified": {&abci.ResponseInfo{ - LastBlockHeight: 3, - LastBlockAppHash: []byte("app_hash"), - AppVersion: appVersion, - }, nil, nil}, - "invalid app version": {&abci.ResponseInfo{ - LastBlockHeight: 3, - LastBlockAppHash: []byte("app_hash"), - AppVersion: 2, - }, nil, appVersionMismatchErr}, - "invalid height": {&abci.ResponseInfo{ - LastBlockHeight: 5, - LastBlockAppHash: []byte("app_hash"), - AppVersion: appVersion, - }, nil, errVerifyFailed}, - "invalid hash": {&abci.ResponseInfo{ - LastBlockHeight: 3, - LastBlockAppHash: []byte("xxx"), - AppVersion: appVersion, - }, nil, errVerifyFailed}, - "error": {nil, boom, boom}, - } - - for name, tc := range testcases { - t.Run(name, func(t *testing.T) { - ctx := t.Context() - - rts := setup(t, nil, nil, true) - - app := rts.conn - app.info.Push(func(_ context.Context, req *abci.RequestInfo) (*abci.ResponseInfo, error) { - utils.OrPanic(utils.TestDiff(&version.RequestInfo, req)) - return tc.response, tc.err - }) - err := rts.reactor.syncer.verifyApp(ctx, s, appVersion) - unwrapped := errors.Unwrap(err) - if unwrapped != nil { - err = unwrapped - } - require.Equal(t, tc.expectErr, err) - app.AssertExpectations(t) - }) - } -} - func toABCI(s *snapshot) *abci.Snapshot { return &abci.Snapshot{ Height: s.Height, diff --git a/sei-tendermint/internal/statesync/syncer_verifyapp_default_test.go b/sei-tendermint/internal/statesync/syncer_verifyapp_default_test.go new file mode 100644 index 0000000000..44f7384dbe --- /dev/null +++ b/sei-tendermint/internal/statesync/syncer_verifyapp_default_test.go @@ -0,0 +1,72 @@ +//go:build !mock_chain_validation && !mock_block_validation + +// verifyApp halts on an appHash mismatch only in the default build; a mock +// validation build swallows ErrAppHash, so this assertion is default-build only. +package statesync + +import ( + "context" + "errors" + "testing" + + abci "github.com/sei-protocol/sei-chain/sei-tendermint/abci/types" + "github.com/sei-protocol/sei-chain/sei-tendermint/libs/utils" + "github.com/sei-protocol/sei-chain/sei-tendermint/libs/utils/require" + "github.com/sei-protocol/sei-chain/sei-tendermint/version" +) + +func TestSyncer_verifyApp(t *testing.T) { + boom := errors.New("boom") + const appVersion = 9 + appVersionMismatchErr := errors.New("app version mismatch. Expected: 9, got: 2") + s := &snapshot{Height: 3, Format: 1, Chunks: 5, Hash: []byte{1, 2, 3}, trustedAppHash: []byte("app_hash")} + + testcases := map[string]struct { + response *abci.ResponseInfo + err error + expectErr error + }{ + "verified": {&abci.ResponseInfo{ + LastBlockHeight: 3, + LastBlockAppHash: []byte("app_hash"), + AppVersion: appVersion, + }, nil, nil}, + "invalid app version": {&abci.ResponseInfo{ + LastBlockHeight: 3, + LastBlockAppHash: []byte("app_hash"), + AppVersion: 2, + }, nil, appVersionMismatchErr}, + "invalid height": {&abci.ResponseInfo{ + LastBlockHeight: 5, + LastBlockAppHash: []byte("app_hash"), + AppVersion: appVersion, + }, nil, errVerifyFailed}, + "invalid hash": {&abci.ResponseInfo{ + LastBlockHeight: 3, + LastBlockAppHash: []byte("xxx"), + AppVersion: appVersion, + }, nil, errVerifyFailed}, + "error": {nil, boom, boom}, + } + + for name, tc := range testcases { + t.Run(name, func(t *testing.T) { + ctx := t.Context() + + rts := setup(t, nil, nil, true) + + app := rts.conn + app.info.Push(func(_ context.Context, req *abci.RequestInfo) (*abci.ResponseInfo, error) { + utils.OrPanic(utils.TestDiff(&version.RequestInfo, req)) + return tc.response, tc.err + }) + err := rts.reactor.syncer.verifyApp(ctx, s, appVersion) + unwrapped := errors.Unwrap(err) + if unwrapped != nil { + err = unwrapped + } + require.Equal(t, tc.expectErr, err) + app.AssertExpectations(t) + }) + } +} diff --git a/sei-tendermint/types/block_test.go b/sei-tendermint/types/block_test.go index 1f11504f54..7b2fa4d1fe 100644 --- a/sei-tendermint/types/block_test.go +++ b/sei-tendermint/types/block_test.go @@ -75,6 +75,13 @@ func TestBlockValidateBasic(t *testing.T) { require.NoError(t, err) evList := []Evidence{ev} + // ValidateBasic routes its structural checks through the build's ConsensusPolicy. + // Probe the active policy rather than hard-coding, so the table holds under the + // default build and every mock tag. + lastCommitSwallowed := DefaultConsensusPolicy().HandleError(ErrLastCommitHash) == nil + dataHashSwallowed := DefaultConsensusPolicy().HandleError(ErrDataHash) == nil + evidenceHashSwallowed := DefaultConsensusPolicy().HandleError(ErrEvidenceHash) == nil + testCases := []struct { testName string malleateBlock func(*Block) @@ -86,18 +93,20 @@ func TestBlockValidateBasic(t *testing.T) { {"Remove 1/2 the commits", func(blk *Block) { blk.LastCommit.Signatures = commit.Signatures[:commit.Size()/2] blk.LastCommit.hash = nil // clear hash or change wont be noticed - }, true}, + }, !lastCommitSwallowed}, + // A 14-byte value trips the hard 32-byte size check (not the policy-routed + // ErrLastCommitHash), so it halts in every build. {"Remove LastCommitHash", func(blk *Block) { blk.LastCommitHash = []byte("something else") }, true}, {"Tampered Data", func(blk *Block) { blk.Data.Txs[0] = Tx("something else") blk.Data.hash = nil // clear hash or change wont be noticed - }, true}, + }, !dataHashSwallowed}, {"Tampered DataHash", func(blk *Block) { blk.DataHash = tmrand.Bytes(len(blk.DataHash)) - }, true}, + }, !dataHashSwallowed}, {"Tampered EvidenceHash", func(blk *Block) { blk.EvidenceHash = tmrand.Bytes(len(blk.EvidenceHash)) - }, true}, + }, !evidenceHashSwallowed}, {"Incorrect block protocol version", func(blk *Block) { blk.Version.Block = 1 }, true}, diff --git a/sei-tendermint/types/consensus_policy.go b/sei-tendermint/types/consensus_policy.go index 6b83f1ce7e..cc1f4fd2b2 100644 --- a/sei-tendermint/types/consensus_policy.go +++ b/sei-tendermint/types/consensus_policy.go @@ -1,16 +1,19 @@ // Package types — ConsensusPolicy is a zero-sized, build-tag-selected gate // that decides, per validation failure, whether a halting validation failure -// halts (default) or is swallowed (counter incremented, then continued). The -// single method HandleError(err) is declared in exactly one of three per-tag -// files, so each binary compiles in one fixed policy with no runtime branch: +// halts (default) or is swallowed (counter incremented, then continued). Its +// HandleError(err) method is declared in exactly one of three per-tag files, so +// each binary compiles in one fixed policy with no runtime branch: // // default (production) → returns err for every failure; production halting // semantics are unchanged -// mock_block_validation → returns nil for ErrAppHash and ErrDataHash; -// preserves the long-standing behavior of that tag -// mock_chain_validation → returns nil for every swallow-eligible audit-row -// sentinel except ErrLastCommitVerify, excluded to -// avoid a downstream buildLastCommitInfo panic +// mock_block_validation → returns nil for ErrAppHash, ErrDataHash, and +// ErrUpgradeBeforeTrigger +// mock_chain_validation → returns nil for every audit-row sentinel except the +// peer-content-integrity trio (ErrDataHash, +// ErrEvidenceHash, ErrPerEvidenceValidateBasic), which +// still halt; the swallowed set includes +// ErrLastCommitVerify, whose commit/validator-set drift +// buildLastCommitInfo tolerates // // Validation failures are modeled as *ConsensusPolicyError sentinels. Call sites // attach context with idiomatic fmt.Errorf("...: %w", ErrX): wrapping keeps @@ -56,6 +59,9 @@ var ( ErrLastCommitHash = &ConsensusPolicyError{"last commit hash mismatch"} ErrEvidenceHash = &ConsensusPolicyError{"evidence hash mismatch"} ErrPerEvidenceValidateBasic = &ConsensusPolicyError{"evidence failed ValidateBasic"} + // x/upgrade BeginBlocker raises this for a not-yet-reached upgrade the binary + // already handles; swallow-eligible so a replay can run past it. + ErrUpgradeBeforeTrigger = &ConsensusPolicyError{"binary updated before upgrade trigger"} ) // ValidationErrors returns the audit's swallow-eligible sentinel set. @@ -75,5 +81,6 @@ func ValidationErrors() []error { ErrLastCommitHash, ErrEvidenceHash, ErrPerEvidenceValidateBasic, + ErrUpgradeBeforeTrigger, } } diff --git a/sei-tendermint/types/consensus_policy_mock_block_validation.go b/sei-tendermint/types/consensus_policy_mock_block_validation.go index 6f113052ae..36d54f3de3 100644 --- a/sei-tendermint/types/consensus_policy_mock_block_validation.go +++ b/sei-tendermint/types/consensus_policy_mock_block_validation.go @@ -4,14 +4,14 @@ package types import "errors" -// Swallow set is ErrAppHash + ErrDataHash only — these are the two checks the -// mock_block_validation tag has always relaxed; preserving that exact set -// keeps user-visible outcomes under this tag unchanged across the refactor. -// All other audit-row failures halt as in production. +// Swallow set is ErrAppHash + ErrDataHash (the two block-validation checks this +// tag has always relaxed) plus ErrUpgradeBeforeTrigger, which lets a replay run a +// binary that already contains upgrade handlers for heights it has not yet +// reached. All other audit-row failures halt as in production. type ConsensusPolicy struct{} func (ConsensusPolicy) HandleError(err error) error { - if errors.Is(err, ErrAppHash) || errors.Is(err, ErrDataHash) { + if errors.Is(err, ErrAppHash) || errors.Is(err, ErrDataHash) || errors.Is(err, ErrUpgradeBeforeTrigger) { recordUnsafeValidationSkipped(err) return nil } diff --git a/sei-tendermint/types/consensus_policy_mock_block_validation_test.go b/sei-tendermint/types/consensus_policy_mock_block_validation_test.go index eaf11f292e..54c3d1d919 100644 --- a/sei-tendermint/types/consensus_policy_mock_block_validation_test.go +++ b/sei-tendermint/types/consensus_policy_mock_block_validation_test.go @@ -24,6 +24,7 @@ func TestConsensusPolicy_MockBlockValidation_Matrix(t *testing.T) { ErrLastCommitHash: false, ErrEvidenceHash: false, ErrPerEvidenceValidateBasic: false, + ErrUpgradeBeforeTrigger: true, } for _, sentinel := range ValidationErrors() { swallow, ok := swallowExpected[sentinel] diff --git a/sei-tendermint/types/consensus_policy_mock_chain_validation.go b/sei-tendermint/types/consensus_policy_mock_chain_validation.go index b74455969d..9a53a81217 100644 --- a/sei-tendermint/types/consensus_policy_mock_chain_validation.go +++ b/sei-tendermint/types/consensus_policy_mock_chain_validation.go @@ -4,23 +4,32 @@ package types import "errors" -// ConsensusPolicy here swallows every ValidationErrors sentinel except -// ErrLastCommitVerify (excluded — it would panic downstream in -// buildLastCommitInfo). A swallowed failure increments the counter and -// continues; ErrLastCommitVerify halts and is not counted. +// ConsensusPolicy here swallows only the sentinels that drift because this build +// cannot reproduce the migration-affected app state or the validator set it +// replays against; the logical-digest comparator is this build's correctness +// signal, not these checks. +// +// Peer-supplied block-content integrity is deliberately NOT swallowed -- +// ErrDataHash, ErrEvidenceHash, ErrPerEvidenceValidateBasic still halt -- so a +// malformed or lying peer cannot silently poison the audit input. type ConsensusPolicy struct{} -var swallowedErrors = func() []error { - errs := make([]error, 0, len(ValidationErrors())) - for _, e := range ValidationErrors() { - // Excluded — would panic downstream in buildLastCommitInfo. - if e == ErrLastCommitVerify { - continue - } - errs = append(errs, e) - } - return errs -}() +// Allowlist (not "ValidationErrors() minus exclusions"): a sentinel added later +// halts by default until it is shown to drift for migration/validator-set reasons +// and added here — the safe default for a consensus-relaxing build. +var swallowedErrors = []error{ + ErrAppHash, + ErrLastResultsHash, + ErrLastBlockID, + ErrConsensusHash, + ErrValidatorsHash, + ErrNextValidatorsHash, + ErrLastCommitVerify, + ErrLastCommitHash, + ErrProposerNotInValidatorSet, + ErrTooMuchEvidence, + ErrUpgradeBeforeTrigger, +} func (ConsensusPolicy) HandleError(err error) error { for _, e := range swallowedErrors { diff --git a/sei-tendermint/types/consensus_policy_mock_chain_validation_test.go b/sei-tendermint/types/consensus_policy_mock_chain_validation_test.go index ce342b116f..82f9b89833 100644 --- a/sei-tendermint/types/consensus_policy_mock_chain_validation_test.go +++ b/sei-tendermint/types/consensus_policy_mock_chain_validation_test.go @@ -10,18 +10,30 @@ import ( func TestConsensusPolicy_MockChainValidation_SwallowMatrix(t *testing.T) { policy := DefaultConsensusPolicy() - for _, sentinel := range ValidationErrors() { - // A contextual error wrapping the sentinel must match it under errors.Is. - err := fmt.Errorf("validation failed: %w", sentinel) - got := policy.HandleError(err) - if sentinel == ErrLastCommitVerify { - if got != err { - t.Errorf("mock_chain_validation ConsensusPolicy.HandleError(%v) = %v, want the input error (excluded from swallow set)", sentinel, got) - } - continue + + swallowed := make(map[error]bool, len(swallowedErrors)) + for _, e := range swallowedErrors { + swallowed[e] = true + } + + // Semantic guard: peer-supplied block-content integrity must NEVER be in the + // swallow allowlist, so a malformed/lying peer cannot silently poison the + // audit input. This is the load-bearing safety property of this build. + for _, e := range []error{ErrDataHash, ErrEvidenceHash, ErrPerEvidenceValidateBasic} { + if swallowed[e] { + t.Errorf("%v is in the swallow allowlist; peer-supplied content integrity must halt", e) } - if got != nil { - t.Errorf("mock_chain_validation ConsensusPolicy.HandleError(%v) = %v, want nil", sentinel, got) + } + + // HandleError swallows exactly the allowlist; everything else in the audit set + // halts, including any newly added sentinel (the allowlist is halt-by-default). + for _, sentinel := range ValidationErrors() { + got := policy.HandleError(fmt.Errorf("validation failed: %w", sentinel)) + switch { + case swallowed[sentinel] && got != nil: + t.Errorf("HandleError(%v) = %v, want swallowed (nil)", sentinel, got) + case !swallowed[sentinel] && got == nil: + t.Errorf("HandleError(%v) = nil, want HALT (not in the swallow allowlist)", sentinel) } } } diff --git a/sei-tendermint/types/consensus_policy_test.go b/sei-tendermint/types/consensus_policy_test.go index dd6845e5dc..abc1948aa1 100644 --- a/sei-tendermint/types/consensus_policy_test.go +++ b/sei-tendermint/types/consensus_policy_test.go @@ -28,8 +28,8 @@ func TestConsensusPolicy_Default_UnknownErrorReturnsErr(t *testing.T) { func TestValidationErrors_Count(t *testing.T) { got := len(ValidationErrors()) - if got != 13 { - t.Errorf("ValidationErrors() returned %d sentinels, want 13 (per M1.0 audit)", got) + if got != 14 { + t.Errorf("ValidationErrors() returned %d sentinels, want 14", got) } } diff --git a/sei-tendermint/types/policy_metrics.go b/sei-tendermint/types/policy_metrics.go index 163b5fa27a..598465b27f 100644 --- a/sei-tendermint/types/policy_metrics.go +++ b/sei-tendermint/types/policy_metrics.go @@ -53,6 +53,8 @@ func validationLabel(err error) string { return "evidence_hash" case errors.Is(err, ErrPerEvidenceValidateBasic): return "per_evidence_validate_basic" + case errors.Is(err, ErrUpgradeBeforeTrigger): + return "upgrade_before_trigger" default: return "unknown" }