From 62a68cc280935e050ddc25a108603ae61abb318e Mon Sep 17 00:00:00 2001 From: Saleem Rashid Date: Tue, 21 Apr 2026 15:35:43 +0100 Subject: [PATCH 1/3] store: Don't hold write lock while receiving LTX Holding the recovery lock for too long results in "locking protocol" errors from SQLite. This happens when receiving the LTX takes too long (e.g. the LTX is large or the connection to the primary is slow). Before we have the LTX, we only verify whether the database position matches the LTX pre-apply position. We can instead keep this as a preflight check (without the write lock), then verify again once we've received the LTX and acquired the write lock. --- store.go | 55 +++++++++++++++++++++++++++++++------------------------ 1 file changed, 31 insertions(+), 24 deletions(-) diff --git a/store.go b/store.go index 982261f..3339fac 100644 --- a/store.go +++ b/store.go @@ -1522,14 +1522,6 @@ func (s *Store) processLTXStreamFrame(ctx context.Context, frame *LTXStreamFrame TraceLog.Printf("[ProcessLTXStreamFrame.End(%s)]: %s", db.name, errorKeyValue(err)) }() - // Acquire lock unless we are waiting for a database position, in which case, - // we already have the lock. - guardSet, err := db.AcquireWriteLock(ctx, nil) - if err != nil { - return err - } - defer guardSet.Unlock() - // Skip frame if it already occurred on this node. This can happen if the // replica node created the transaction and forwarded it to the primary. if hdr.NodeID == s.ID() { @@ -1543,24 +1535,12 @@ func (s *Store) processLTXStreamFrame(ctx context.Context, frame *LTXStreamFrame return nil } - // If we receive an LTX file while holding the remote HALT lock then the - // remote lock must have expired or been released so we can clear it locally. - // - // We also hold the local WRITE lock so a local write cannot be in-progress. - if haltLock := db.RemoteHaltLock(); haltLock != nil { - TraceLog.Printf("[ProcessLTXStreamFrame.Unhalt(%s)]: replica holds HALT lock but received LTX file, unsetting HALT lock", db.Name()) - if err := db.UnsetRemoteHaltLock(ctx, haltLock.ID); err != nil { - return fmt.Errorf("release remote halt lock: %w", err) - } - } - // Verify LTX file pre-apply checksum matches the current database position - // unless this is a snapshot, which will overwrite all data. + // unless this is a snapshot, which will overwrite all data. This is a preflight + // check so we can skip streaming an LTX we can't apply, but we'll need to verify + // again after we acquire the write lock. if !hdr.IsSnapshot() { - expectedPos := ltx.Pos{ - TXID: hdr.MinTXID - 1, - PostApplyChecksum: hdr.PreApplyChecksum, - } + expectedPos := hdr.PreApplyPos() if pos := db.Pos(); pos != expectedPos { return fmt.Errorf("position mismatch on db %q: %s <> %s", db.Name(), pos, expectedPos) } @@ -1595,6 +1575,33 @@ func (s *Store) processLTXStreamFrame(ctx context.Context, frame *LTXStreamFrame dbLTXCountMetricVec.WithLabelValues(db.Name()).Inc() dbLTXBytesMetricVec.WithLabelValues(db.Name()).Set(float64(n)) + // Acquire lock unless we are waiting for a database position, in which case, + // we already have the lock. + guardSet, err := db.AcquireWriteLock(ctx, nil) + if err != nil { + return err + } + defer guardSet.Unlock() + + // If we receive an LTX file while holding the remote HALT lock then the + // remote lock must have expired or been released so we can clear it locally. + // + // We also hold the local WRITE lock so a local write cannot be in-progress. + if haltLock := db.RemoteHaltLock(); haltLock != nil { + TraceLog.Printf("[ProcessLTXStreamFrame.Unhalt(%s)]: replica holds HALT lock but received LTX file, unsetting HALT lock", db.Name()) + if err := db.UnsetRemoteHaltLock(ctx, haltLock.ID); err != nil { + return fmt.Errorf("release remote halt lock: %w", err) + } + } + + // Verify the database position again, now we're holding the write lock. + if !hdr.IsSnapshot() { + expectedPos := hdr.PreApplyPos() + if pos := db.Pos(); pos != expectedPos { + return fmt.Errorf("position mismatch on db %q: %s <> %s", db.Name(), pos, expectedPos) + } + } + // Remove other LTX files after a snapshot. if hdr.IsSnapshot() { dir, file := filepath.Split(path) From 81a86a570463e8e21ec0ea8d2d465a77f2b2dc26 Mon Sep 17 00:00:00 2001 From: Saleem Rashid Date: Wed, 22 Apr 2026 13:20:06 +0100 Subject: [PATCH 2/3] store: Fix deadlock between AcquireRemoteHaltLock and processLTXStreamFrame Moving the halt lock check after receiving the LTX increases the probability of an existing deadlock: - AcquireRemoteHaltLock stores remoteHaltLock and calls WaitPosExact, i.e. it waits for processLTXStreamFrame to process pending LTXs - processLTXStreamFrame sees remoteHaltLock != nil (this is more likely now, since it receives the LTX first) - It incorrectly assumes the remote lock is stale and tries to clear it, since we don't expect new LTXs from the primary while we hold the halt lock - UnsetRemoteHaltLock deadlocks in Recover, because processLTXStreamFrame already called AcquireWriteLock - WaitPosExact deadlocks, because it's waiting for processLTXStreamFrame, which is deadlocked Instead of trying to clear the remote lock locally, processLTXStreamFrame will perform recovery so the LTX can be applied. If the remote lock was stale, we'll now only find out when committing a transaction, as the primary will reject the remote commit. Deferring the error is an acceptable trade-off, as the halt lock is intended to be short-lived. --- store.go | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/store.go b/store.go index 3339fac..b8db821 100644 --- a/store.go +++ b/store.go @@ -1583,14 +1583,16 @@ func (s *Store) processLTXStreamFrame(ctx context.Context, frame *LTXStreamFrame } defer guardSet.Unlock() - // If we receive an LTX file while holding the remote HALT lock then the - // remote lock must have expired or been released so we can clear it locally. + // If we receive an LTX file while holding the remote HALT lock, then the remote lock + // might have expired or been released, and we could clear it locally. However, we might + // have recently acquired the remote lock and are waiting to catch up with the primary. // - // We also hold the local WRITE lock so a local write cannot be in-progress. + // In both cases, we hold the local WRITE lock so a local write cannot be in-progress. + // We'll perform recovery so the LTX can be applied, but not clear the remote lock. if haltLock := db.RemoteHaltLock(); haltLock != nil { - TraceLog.Printf("[ProcessLTXStreamFrame.Unhalt(%s)]: replica holds HALT lock but received LTX file, unsetting HALT lock", db.Name()) - if err := db.UnsetRemoteHaltLock(ctx, haltLock.ID); err != nil { - return fmt.Errorf("release remote halt lock: %w", err) + TraceLog.Printf("[ProcessLTXStreamFrame.Recover(%s)]: replica holds HALT lock but received LTX file, performing recovery", db.Name()) + if err := db.recover(ctx); err != nil { + return fmt.Errorf("recover: %w", err) } } From 68dd6222b9cf820b1529960da2cac114cdf52af6 Mon Sep 17 00:00:00 2001 From: Saleem Rashid Date: Wed, 22 Apr 2026 14:25:01 +0100 Subject: [PATCH 3/3] mount: Fix race condition in WAL tests When "PRAGMA journal_mode = WAL" is used on the replica before the database file exists, SQLite will try to create the database file to update the header, which fails with "attempt to write a readonly database". --- cmd/litefs/mount_test.go | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/cmd/litefs/mount_test.go b/cmd/litefs/mount_test.go index 50073c0..976b5ed 100644 --- a/cmd/litefs/mount_test.go +++ b/cmd/litefs/mount_test.go @@ -1067,6 +1067,7 @@ func TestMultiNode_WALToJournal(t *testing.T) { waitForPrimary(t, cmd0) cmd1 := runMountCommand(t, newMountCommand(t, t.TempDir(), cmd0)) db0 := testingutil.OpenSQLDB(t, filepath.Join(cmd0.Config.FUSE.Dir, "db")) + waitForInitialDB(t, "db", cmd1) db1 := testingutil.OpenSQLDB(t, filepath.Join(cmd1.Config.FUSE.Dir, "db")) // Switch to WAL mode. @@ -1110,6 +1111,7 @@ func TestMultiNode_JournalToWAL(t *testing.T) { waitForPrimary(t, cmd0) cmd1 := runMountCommand(t, newMountCommand(t, t.TempDir(), cmd0)) db0 := testingutil.OpenSQLDB(t, filepath.Join(cmd0.Config.FUSE.Dir, "db")) + waitForInitialDB(t, "db", cmd1) db1 := testingutil.OpenSQLDB(t, filepath.Join(cmd1.Config.FUSE.Dir, "db")) // Create a simple table. @@ -1940,6 +1942,7 @@ func TestMultiNode_Halt(t *testing.T) { waitForPrimary(t, cmd0) cmd1 := runMountCommand(t, newMountCommand(t, t.TempDir(), cmd0)) db0 := testingutil.OpenSQLDB(t, filepath.Join(cmd0.Config.FUSE.Dir, "db")) + waitForInitialDB(t, "db", cmd1) db1 := testingutil.OpenSQLDB(t, filepath.Join(cmd1.Config.FUSE.Dir, "db")) // Create a simple table with a single value. @@ -2126,6 +2129,7 @@ func TestMultiNode_Handoff(t *testing.T) { waitForPrimary(t, cmd0) cmd1 := runMountCommand(t, newMountCommand(t, t.TempDir(), cmd0)) db0 := testingutil.OpenSQLDB(t, filepath.Join(cmd0.Config.FUSE.Dir, "db")) + waitForInitialDB(t, "db", cmd1) db1 := testingutil.OpenSQLDB(t, filepath.Join(cmd1.Config.FUSE.Dir, "db")) // Create a simple table with a single value. @@ -3173,6 +3177,30 @@ func waitForBackupSync(tb testing.TB, cmd *main.MountCommand) { }) } +// waitForInitialDB waits for a database to exist, when in WAL mode. It does nothing +// in journal mode. This is required because opening a non-existent DB in WAL mode +// will create the DB (to update the header), which will fail with "attempt to write +// a readonly database" on replicas. +func waitForInitialDB(tb testing.TB, name string, cmd *main.MountCommand) { + tb.Helper() + + if !testingutil.IsWALMode() { + return + } + + testingutil.RetryUntil(tb, 1*time.Millisecond, 5*time.Second, func() error { + tb.Helper() + + if db := cmd.Store.DB(name); db == nil { + return fmt.Errorf("no database") + } + + tb.Logf("db %q exists", name) + + return nil + }) +} + var HTTPClient = &http.Client{ Transport: &http2.Transport{ AllowHTTP: true,