From 1ca166ff48d885fb3150ef88ebe83b122c885f27 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 27 Nov 2023 08:19:15 +0000
Subject: [PATCH] Bump github.com/nats-io/nats-server/v2 from 2.10.4 to 2.10.5

Bumps [github.com/nats-io/nats-server/v2](https://github.com/nats-io/nats-server) from 2.10.4 to 2.10.5.
- [Release notes](https://github.com/nats-io/nats-server/releases)
- [Changelog](https://github.com/nats-io/nats-server/blob/main/.goreleaser.yml)
- [Commits](https://github.com/nats-io/nats-server/compare/v2.10.4...v2.10.5)

---
updated-dependencies:
- dependency-name: github.com/nats-io/nats-server/v2
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 go.mod                                        |   6 +-
 go.sum                                        |  11 +-
 .../nats-io/jwt/v2/operator_claims.go         |   6 +-
 .../nats-io/nats-server/v2/server/auth.go     |   2 +-
 .../nats-io/nats-server/v2/server/client.go   |  47 +-
 .../nats-io/nats-server/v2/server/const.go    |   2 +-
 .../nats-io/nats-server/v2/server/consumer.go |   5 +-
 .../nats-io/nats-server/v2/server/events.go   |  20 +-
 .../nats-server/v2/server/filestore.go        | 520 +++++++++++-------
 .../nats-server/v2/server/jetstream.go        |   5 +
 .../v2/server/jetstream_cluster.go            |  80 +--
 .../nats-io/nats-server/v2/server/leafnode.go |  11 +-
 .../nats-io/nats-server/v2/server/memstore.go |   4 +-
 .../nats-io/nats-server/v2/server/mqtt.go     | 464 ++++++++++------
 .../nats-io/nats-server/v2/server/raft.go     | 435 +++++++++------
 .../nats-io/nats-server/v2/server/stream.go   | 101 ++--
 .../nats-server/v2/server/websocket.go        |   4 +-
 vendor/modules.txt                            |   8 +-
 18 files changed, 1066 insertions(+), 665 deletions(-)

diff --git a/go.mod b/go.mod
index d31dc81a4a..daeb87be15 100644
--- a/go.mod
+++ b/go.mod
@@ -60,7 +60,7 @@ require (
 	github.com/mitchellh/mapstructure v1.5.0
 	github.com/mna/pigeon v1.2.1
 	github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826
-	github.com/nats-io/nats-server/v2 v2.10.4
+	github.com/nats-io/nats-server/v2 v2.10.5
 	github.com/oklog/run v1.1.0
 	github.com/olekukonko/tablewriter v0.0.5
 	github.com/onsi/ginkgo v1.16.5
@@ -272,7 +272,7 @@ require (
 	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
 	github.com/modern-go/reflect2 v1.0.2 // indirect
 	github.com/mschoch/smat v0.2.0 // indirect
-	github.com/nats-io/jwt/v2 v2.5.2 // indirect
+	github.com/nats-io/jwt/v2 v2.5.3 // indirect
 	github.com/nats-io/nats.go v1.31.0 // indirect
 	github.com/nats-io/nkeys v0.4.6 // indirect
 	github.com/nats-io/nuid v1.0.1 // indirect
@@ -331,7 +331,7 @@ require (
 	golang.org/x/exp v0.0.0-20230905200255-921286631fa9 // indirect
 	golang.org/x/mod v0.13.0 // indirect
 	golang.org/x/sys v0.14.0 // indirect
-	golang.org/x/time v0.3.0 // indirect
+	golang.org/x/time v0.4.0 // indirect
 	golang.org/x/tools v0.14.0 // indirect
 	golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect
 	google.golang.org/appengine v1.6.8 // indirect
diff --git a/go.sum b/go.sum
index be5bdb86bf..ab5bb319cd 100644
--- a/go.sum
+++ b/go.sum
@@ -1739,10 +1739,10 @@ github.com/mschoch/smat v0.2.0/go.mod h1:kc9mz7DoBKqDyiRL7VZN8KvXQMWeTaVnttLRXOl
 github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
 github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
 github.com/namedotcom/go v0.0.0-20180403034216-08470befbe04/go.mod h1:5sN+Lt1CaY4wsPvgQH/jsuJi4XO2ssZbdsIizr4CVC8=
-github.com/nats-io/jwt/v2 v2.5.2 h1:DhGH+nKt+wIkDxM6qnVSKjokq5t59AZV5HRcFW0zJwU=
-github.com/nats-io/jwt/v2 v2.5.2/go.mod h1:24BeQtRwxRV8ruvC4CojXlx/WQ/VjuwlYiH+vu/+ibI=
-github.com/nats-io/nats-server/v2 v2.10.4 h1:uB9xcwon3tPXWAdmTJqqqC6cie3yuPWHJjjTBgaPNus=
-github.com/nats-io/nats-server/v2 v2.10.4/go.mod h1:eWm2JmHP9Lqm2oemB6/XGi0/GwsZwtWf8HIPUsh+9ns=
+github.com/nats-io/jwt/v2 v2.5.3 h1:/9SWvzc6hTfamcgXJ3uYRpgj+QuY2aLNqRiqrKcrpEo=
+github.com/nats-io/jwt/v2 v2.5.3/go.mod h1:iysuPemFcc7p4IoYots3IuELSI4EDe9Y0bQMe+I3Bf4=
+github.com/nats-io/nats-server/v2 v2.10.5 h1:hhWt6m9ja/mNnm6ixc85jCthDaiUFPaeJI79K/MD980=
+github.com/nats-io/nats-server/v2 v2.10.5/go.mod h1:xUMTU4kS//SDkJCSvFwN9SyJ9nUuLhSkzB/Qz0dvjjg=
 github.com/nats-io/nats.go v1.31.0 h1:/WFBHEc/dOKBF6qf1TZhrdEfTmOZ5JzdJ+Y3m6Y/p7E=
 github.com/nats-io/nats.go v1.31.0/go.mod h1:di3Bm5MLsoB4Bx61CBTsxuarI36WbhAwOm8QrW39+i8=
 github.com/nats-io/nkeys v0.4.6 h1:IzVe95ru2CT6ta874rt9saQRkWfe2nFj1NtvYSLqMzY=
@@ -2548,8 +2548,9 @@ golang.org/x/time v0.0.0-20201208040808-7e3f01d25324/go.mod h1:tRJNPiyCQ0inRvYxb
 golang.org/x/time v0.0.0-20210220033141-f8bda1e9f3ba/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
 golang.org/x/time v0.0.0-20220922220347-f3bd1da661af/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
 golang.org/x/time v0.1.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
-golang.org/x/time v0.3.0 h1:rg5rLMjNzMS1RkNLzCG38eapWhnYLFYXDXj2gOlr8j4=
 golang.org/x/time v0.3.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
+golang.org/x/time v0.4.0 h1:Z81tqI5ddIoXDPvVQ7/7CC9TnLM7ubaFG2qXYd5BbYY=
+golang.org/x/time v0.4.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
 golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20180828015842-6cd1fcedba52/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
diff --git a/vendor/github.com/nats-io/jwt/v2/operator_claims.go b/vendor/github.com/nats-io/jwt/v2/operator_claims.go
index 3835b973e8..673225fa82 100644
--- a/vendor/github.com/nats-io/jwt/v2/operator_claims.go
+++ b/vendor/github.com/nats-io/jwt/v2/operator_claims.go
@@ -136,8 +136,12 @@ func ValidateOperatorServiceURL(v string) error {
 		return nil
 	case "tls":
 		return nil
+	case "ws":
+		return nil
+	case "wss":
+		return nil
 	default:
-		return fmt.Errorf("operator service url %q - protocol not supported (only 'nats' or 'tls' only)", v)
+		return fmt.Errorf("operator service url %q - protocol not supported (only 'nats', 'tls', 'ws', 'wss' only)", v)
 	}
 }
 
diff --git a/vendor/github.com/nats-io/nats-server/v2/server/auth.go b/vendor/github.com/nats-io/nats-server/v2/server/auth.go
index b8e82abe41..7a0f93e217 100644
--- a/vendor/github.com/nats-io/nats-server/v2/server/auth.go
+++ b/vendor/github.com/nats-io/nats-server/v2/server/auth.go
@@ -525,7 +525,7 @@ func processUserPermissionsTemplate(lim jwt.UserPermissionLimits, ujwt *jwt.User
 				for _, valueList := range nArrayCartesianProduct(tagValues...) {
 					b := strings.Builder{}
 					for i, token := range newTokens {
-						if token == _EMPTY_ {
+						if token == _EMPTY_ && len(valueList) > 0 {
 							b.WriteString(valueList[0])
 							valueList = valueList[1:]
 						} else {
diff --git a/vendor/github.com/nats-io/nats-server/v2/server/client.go b/vendor/github.com/nats-io/nats-server/v2/server/client.go
index 6aba4395ad..72346e6958 100644
--- a/vendor/github.com/nats-io/nats-server/v2/server/client.go
+++ b/vendor/github.com/nats-io/nats-server/v2/server/client.go
@@ -1,4 +1,4 @@
-// Copyright 2012-2022 The NATS Authors
+// Copyright 2012-2023 The NATS Authors
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -1340,6 +1340,7 @@ func (c *client) readLoop(pre []byte) {
 					c.Errorf("read error: %v", err)
 				}
 				c.closeConnection(closedStateForErr(err))
+				return
 			} else if bufs == nil {
 				continue
 			}
@@ -1498,15 +1499,6 @@ func (c *client) collapsePtoNB() (net.Buffers, int64) {
 	return c.out.nb, c.out.pb
 }
 
-// This will handle the fixup needed on a partial write.
-// Assume pending has been already calculated correctly.
-func (c *client) handlePartialWrite(pnb net.Buffers) {
-	if c.isWebsocket() {
-		c.ws.frames = append(pnb, c.ws.frames...)
-		return
-	}
-}
-
 // flushOutbound will flush outbound buffer to a client.
 // Will return true if data was attempted to be written.
 // Lock must be held
@@ -1677,12 +1669,6 @@ func (c *client) flushOutbound() bool {
 		c.ws.fs -= n
 	}
 
-	// Check for partial writes
-	// TODO(dlc) - zero write with no error will cause lost message and the writeloop to spin.
-	if n != attempted && n > 0 {
-		c.handlePartialWrite(c.out.nb)
-	}
-
 	// Check that if there is still data to send and writeLoop is in wait,
 	// then we need to signal.
 	if c.out.pb > 0 {
@@ -2755,7 +2741,7 @@ func (c *client) processSubEx(subject, queue, bsid []byte, cb msgHandler, noForw
 		return sub, nil
 	}
 
-	if err := c.addShadowSubscriptions(acc, sub); err != nil {
+	if err := c.addShadowSubscriptions(acc, sub, true); err != nil {
 		c.Errorf(err.Error())
 	}
 
@@ -2782,10 +2768,13 @@ type ime struct {
 	dyn         bool
 }
 
-// If the client's account has stream imports and there are matches for
-// this subscription's subject, then add shadow subscriptions in the
-// other accounts that export this subject.
-func (c *client) addShadowSubscriptions(acc *Account, sub *subscription) error {
+// If the client's account has stream imports and there are matches for this
+// subscription's subject, then add shadow subscriptions in the other accounts
+// that export this subject.
+//
+// enact=false allows MQTT clients to get the list of shadow subscriptions
+// without enacting them, in order to first obtain matching "retained" messages.
+func (c *client) addShadowSubscriptions(acc *Account, sub *subscription, enact bool) error {
 	if acc == nil {
 		return ErrMissingAccount
 	}
@@ -2888,7 +2877,7 @@ func (c *client) addShadowSubscriptions(acc *Account, sub *subscription) error {
 	for i := 0; i < len(ims); i++ {
 		ime := &ims[i]
 		// We will create a shadow subscription.
-		nsub, err := c.addShadowSub(sub, ime)
+		nsub, err := c.addShadowSub(sub, ime, enact)
 		if err != nil {
 			return err
 		}
@@ -2905,7 +2894,7 @@ func (c *client) addShadowSubscriptions(acc *Account, sub *subscription) error {
 }
 
 // Add in the shadow subscription.
-func (c *client) addShadowSub(sub *subscription, ime *ime) (*subscription, error) {
+func (c *client) addShadowSub(sub *subscription, ime *ime, enact bool) (*subscription, error) {
 	im := ime.im
 	nsub := *sub // copy
 	nsub.im = im
@@ -2929,6 +2918,11 @@ func (c *client) addShadowSub(sub *subscription, ime *ime) (*subscription, error
 		}
 	}
 	// Else use original subject
+
+	if !enact {
+		return &nsub, nil
+	}
+
 	c.Debugf("Creating import subscription on %q from account %q", nsub.subject, im.acc.Name)
 
 	if err := im.acc.sl.Insert(&nsub); err != nil {
@@ -3298,9 +3292,12 @@ func (c *client) stalledWait(producer *client) {
 	c.mu.Unlock()
 	defer c.mu.Lock()
 
+	delay := time.NewTimer(ttl)
+	defer delay.Stop()
+
 	select {
 	case <-stall:
-	case <-time.After(ttl):
+	case <-delay.C:
 		producer.Debugf("Timed out of fast producer stall (%v)", ttl)
 	}
 }
@@ -5045,7 +5042,7 @@ func (c *client) processSubsOnConfigReload(awcsti map[string]struct{}) {
 		oldShadows := sub.shadow
 		sub.shadow = nil
 		c.mu.Unlock()
-		c.addShadowSubscriptions(acc, sub)
+		c.addShadowSubscriptions(acc, sub, true)
 		for _, nsub := range oldShadows {
 			nsub.im.acc.sl.Remove(nsub)
 		}
diff --git a/vendor/github.com/nats-io/nats-server/v2/server/const.go b/vendor/github.com/nats-io/nats-server/v2/server/const.go
index 603a3ae44d..4295a4304c 100644
--- a/vendor/github.com/nats-io/nats-server/v2/server/const.go
+++ b/vendor/github.com/nats-io/nats-server/v2/server/const.go
@@ -41,7 +41,7 @@ var (
 
 const (
 	// VERSION is the current version for the server.
-	VERSION = "2.10.4"
+	VERSION = "2.10.5"
 
 	// PROTO is the currently supported protocol.
 	// 0 was the original
diff --git a/vendor/github.com/nats-io/nats-server/v2/server/consumer.go b/vendor/github.com/nats-io/nats-server/v2/server/consumer.go
index 1f96e772e8..6953abf518 100644
--- a/vendor/github.com/nats-io/nats-server/v2/server/consumer.go
+++ b/vendor/github.com/nats-io/nats-server/v2/server/consumer.go
@@ -3731,7 +3731,8 @@ func (o *consumer) processInboundAcks(qch chan struct{}) {
 	// How often we will check for ack floor drift.
 	// Spread these out for large numbers on a server restart.
 	delta := time.Duration(rand.Int63n(int64(time.Minute)))
-	var ackFloorCheck = time.Minute + delta
+	ticker := time.NewTicker(time.Minute + delta)
+	defer ticker.Stop()
 
 	for {
 		select {
@@ -3746,7 +3747,7 @@ func (o *consumer) processInboundAcks(qch chan struct{}) {
 			if hasInactiveThresh {
 				o.suppressDeletion()
 			}
-		case <-time.After(ackFloorCheck):
+		case <-ticker.C:
 			o.checkAckFloor()
 		case <-qch:
 			return
diff --git a/vendor/github.com/nats-io/nats-server/v2/server/events.go b/vendor/github.com/nats-io/nats-server/v2/server/events.go
index 601ed85a0d..66d744d451 100644
--- a/vendor/github.com/nats-io/nats-server/v2/server/events.go
+++ b/vendor/github.com/nats-io/nats-server/v2/server/events.go
@@ -810,8 +810,26 @@ func (s *Server) sendStatsz(subj string) {
 		return
 	}
 
+	shouldCheckInterest := func() bool {
+		opts := s.getOpts()
+		if opts.Cluster.Port != 0 || opts.Gateway.Port != 0 || opts.LeafNode.Port != 0 {
+			return false
+		}
+		// If we are here we have no clustering or gateways and are not a leafnode hub.
+		// Check for leafnode remotes that connect the system account.
+		if len(opts.LeafNode.Remotes) > 0 {
+			sysAcc := s.sys.account.GetName()
+			for _, r := range opts.LeafNode.Remotes {
+				if r.LocalAccount == sysAcc {
+					return false
+				}
+			}
+		}
+		return true
+	}
+
 	// if we are running standalone, check for interest.
-	if s.standAloneMode() {
+	if shouldCheckInterest() {
 		// Check if we even have interest in this subject.
 		sacc := s.sys.account
 		rr := sacc.sl.Match(subj)
diff --git a/vendor/github.com/nats-io/nats-server/v2/server/filestore.go b/vendor/github.com/nats-io/nats-server/v2/server/filestore.go
index 3157e6784f..52af94fdb8 100644
--- a/vendor/github.com/nats-io/nats-server/v2/server/filestore.go
+++ b/vendor/github.com/nats-io/nats-server/v2/server/filestore.go
@@ -32,6 +32,7 @@ import (
 	"os"
 	"path/filepath"
 	"sort"
+	"strings"
 	"sync"
 	"sync/atomic"
 	"time"
@@ -156,6 +157,7 @@ type psi struct {
 	total uint64
 	fblk  uint32
 	lblk  uint32
+	subj  string
 }
 
 type fileStore struct {
@@ -177,6 +179,7 @@ type fileStore struct {
 	bim         map[uint32]*msgBlock
 	psim        map[string]*psi
 	tsl         int
+	adml        int
 	hh          hash.Hash64
 	qch         chan struct{}
 	fch         chan struct{}
@@ -184,6 +187,7 @@ type fileStore struct {
 	cfs         []ConsumerStore
 	sips        int
 	dirty       int
+	closing     bool
 	closed      bool
 	fip         bool
 	receivedAny bool
@@ -272,10 +276,6 @@ const (
 	newScan = "%d.new"
 	// used to scan index file names.
 	indexScan = "%d.idx"
-	// to look for orphans
-	indexScanAll = "*.idx"
-	// to look for orphans
-	fssScanAll = "*.fss"
 	// used to store our block encryption key.
 	keyScan = "%d.key"
 	// to look for orphans
@@ -456,10 +456,7 @@ func newFileStoreWithCreated(fcfg FileStoreConfig, cfg StreamConfig, created tim
 	// Also make sure we get rid of old idx and fss files on return.
 	// Do this in separate go routine vs inline and at end of processing.
 	defer func() {
-		go func() {
-			os.RemoveAll(filepath.Join(fs.fcfg.StoreDir, msgDir, indexScanAll))
-			os.RemoveAll(filepath.Join(fs.fcfg.StoreDir, msgDir, fssScanAll))
-		}()
+		go fs.cleanupOldMeta()
 	}()
 
 	// Lock while do enforcements and removals.
@@ -524,7 +521,8 @@ func newFileStoreWithCreated(fcfg FileStoreConfig, cfg StreamConfig, created tim
 		}
 	}
 
-	fs.syncTmr = time.AfterFunc(fs.fcfg.SyncInterval, fs.syncBlocks)
+	// Setup our sync timer.
+	fs.setSyncTimer()
 
 	// Spin up the go routine that will write out or full state stream index.
 	go fs.flushStreamStateLoop(fs.fch, fs.qch, fs.fsld)
@@ -941,10 +939,10 @@ func (mb *msgBlock) ensureLastChecksumLoaded() {
 // Perform a recover but do not update PSIM.
 // Lock should be held.
 func (fs *fileStore) recoverMsgBlockNoSubjectUpdates(index uint32) (*msgBlock, error) {
-	psim := fs.psim
+	psim, tsl := fs.psim, fs.tsl
 	fs.psim = nil
 	mb, err := fs.recoverMsgBlock(index)
-	fs.psim = psim
+	fs.psim, fs.tsl = psim, tsl
 	return mb, err
 }
 
@@ -1098,11 +1096,12 @@ func (fs *fileStore) rebuildStateLocked(ld *LostStreamData) {
 		mb.mu.RLock()
 		fs.state.Msgs += mb.msgs
 		fs.state.Bytes += mb.bytes
-		if fs.state.FirstSeq == 0 || mb.first.seq < fs.state.FirstSeq {
-			fs.state.FirstSeq = mb.first.seq
+		fseq := atomic.LoadUint64(&mb.first.seq)
+		if fs.state.FirstSeq == 0 || fseq < fs.state.FirstSeq {
+			fs.state.FirstSeq = fseq
 			fs.state.FirstTime = time.Unix(0, mb.first.ts).UTC()
 		}
-		fs.state.LastSeq = mb.last.seq
+		fs.state.LastSeq = atomic.LoadUint64(&mb.last.seq)
 		fs.state.LastTime = time.Unix(0, mb.last.ts).UTC()
 		mb.mu.RUnlock()
 	}
@@ -1223,7 +1222,7 @@ func (mb *msgBlock) rebuildState() (*LostStreamData, []uint64, error) {
 // Rebuild the state of the blk based on what we have on disk in the N.blk file.
 // Lock should be held.
 func (mb *msgBlock) rebuildStateLocked() (*LostStreamData, []uint64, error) {
-	startLastSeq := mb.last.seq
+	startLastSeq := atomic.LoadUint64(&mb.last.seq)
 
 	// Remove the .fss file and clear any cache we have set.
 	mb.clearCacheAndOffset()
@@ -1237,7 +1236,8 @@ func (mb *msgBlock) rebuildStateLocked() (*LostStreamData, []uint64, error) {
 		if mb.msgs > 0 {
 			// We need to declare lost data here.
 			ld = &LostStreamData{Msgs: make([]uint64, 0, mb.msgs), Bytes: mb.bytes}
-			for seq := mb.first.seq; seq <= mb.last.seq; seq++ {
+			firstSeq, lastSeq := atomic.LoadUint64(&mb.first.seq), atomic.LoadUint64(&mb.last.seq)
+			for seq := firstSeq; seq <= lastSeq; seq++ {
 				if !mb.dmap.Exists(seq) {
 					ld.Msgs = append(ld.Msgs, seq)
 				}
@@ -1245,14 +1245,15 @@ func (mb *msgBlock) rebuildStateLocked() (*LostStreamData, []uint64, error) {
 			// Clear invalid state. We will let this blk be added in here.
 			mb.msgs, mb.bytes, mb.rbytes, mb.fss = 0, 0, 0, nil
 			mb.dmap.Empty()
-			mb.first.seq = mb.last.seq + 1
+			atomic.StoreUint64(&mb.first.seq, atomic.LoadUint64(&mb.last.seq)+1)
 		}
 		return ld, nil, err
 	}
 
 	// Clear state we need to rebuild.
 	mb.msgs, mb.bytes, mb.rbytes, mb.fss = 0, 0, 0, nil
-	mb.last.seq, mb.last.ts = 0, 0
+	atomic.StoreUint64(&mb.last.seq, 0)
+	mb.last.ts = 0
 	firstNeedsSet := true
 
 	// Check if we need to decrypt.
@@ -1307,7 +1308,7 @@ func (mb *msgBlock) rebuildStateLocked() (*LostStreamData, []uint64, error) {
 
 	gatherLost := func(lb uint32) *LostStreamData {
 		var ld LostStreamData
-		for seq := mb.last.seq + 1; seq <= startLastSeq; seq++ {
+		for seq := atomic.LoadUint64(&mb.last.seq) + 1; seq <= startLastSeq; seq++ {
 			ld.Msgs = append(ld.Msgs, seq)
 		}
 		ld.Bytes = uint64(lb)
@@ -1375,18 +1376,20 @@ func (mb *msgBlock) rebuildStateLocked() (*LostStreamData, []uint64, error) {
 			continue
 		}
 
+		fseq := atomic.LoadUint64(&mb.first.seq)
 		// This is an old erased message, or a new one that we can track.
-		if seq == 0 || seq&ebit != 0 || seq < mb.first.seq {
+		if seq == 0 || seq&ebit != 0 || seq < fseq {
 			seq = seq &^ ebit
-			if seq >= mb.first.seq {
+			if seq >= fseq {
 				// Only add to dmap if past recorded first seq and non-zero.
 				if seq != 0 {
 					addToDmap(seq)
 				}
-				mb.last.seq = seq
+				atomic.StoreUint64(&mb.last.seq, seq)
 				mb.last.ts = ts
 				if mb.msgs == 0 {
-					mb.first.seq, mb.first.ts = seq+1, 0
+					atomic.StoreUint64(&mb.first.seq, seq+1)
+					mb.first.ts = 0
 				}
 			}
 			index += rl
@@ -1396,8 +1399,9 @@ func (mb *msgBlock) rebuildStateLocked() (*LostStreamData, []uint64, error) {
 		// This is for when we have index info that adjusts for deleted messages
 		// at the head. So the first.seq will be already set here. If this is larger
 		// replace what we have with this seq.
-		if firstNeedsSet && seq >= mb.first.seq {
-			firstNeedsSet, mb.first.seq, mb.first.ts = false, seq, ts
+		if firstNeedsSet && seq >= fseq {
+			atomic.StoreUint64(&mb.first.seq, seq)
+			firstNeedsSet, mb.first.ts = false, ts
 		}
 
 		if !mb.dmap.Exists(seq) {
@@ -1423,7 +1427,7 @@ func (mb *msgBlock) rebuildStateLocked() (*LostStreamData, []uint64, error) {
 		}
 
 		// Always set last
-		mb.last.seq = seq
+		atomic.StoreUint64(&mb.last.seq, seq)
 		mb.last.ts = ts
 
 		// Advance to next record.
@@ -1434,12 +1438,15 @@ func (mb *msgBlock) rebuildStateLocked() (*LostStreamData, []uint64, error) {
 	// Or if we seem to have no messages but had a tombstone, which we use to remember
 	// sequences and timestamps now, use that to properly setup the first and last.
 	if mb.msgs == 0 {
-		if mb.first.seq > 0 {
-			mb.last.seq = mb.first.seq - 1
-		} else if mb.first.seq == 0 && minTombstoneSeq > 0 {
-			mb.first.seq, mb.first.ts = minTombstoneSeq+1, 0
+		fseq := atomic.LoadUint64(&mb.first.seq)
+		if fseq > 0 {
+			atomic.StoreUint64(&mb.last.seq, fseq-1)
+		} else if fseq == 0 && minTombstoneSeq > 0 {
+			atomic.StoreUint64(&mb.first.seq, minTombstoneSeq+1)
+			mb.first.ts = 0
 			if mb.last.seq == 0 {
-				mb.last.seq, mb.last.ts = minTombstoneSeq, minTombstoneTs
+				atomic.StoreUint64(&mb.last.seq, minTombstoneSeq)
+				mb.last.ts = minTombstoneTs
 			}
 		}
 	}
@@ -1574,9 +1581,9 @@ func (fs *fileStore) recoverFullState() (rerr error) {
 					fs.warn("Stream state bad subject len (%d)", lsubj)
 					return errCorruptState
 				}
-				subj := fs.subjString(buf[bi : bi+lsubj])
+				subj := string(buf[bi : bi+lsubj])
 				bi += lsubj
-				psi := &psi{total: readU64(), fblk: uint32(readU64())}
+				psi := &psi{total: readU64(), fblk: uint32(readU64()), subj: subj}
 				if psi.total > 1 {
 					psi.lblk = uint32(readU64())
 				} else {
@@ -1597,7 +1604,9 @@ func (fs *fileStore) recoverFullState() (rerr error) {
 				break
 			}
 			mb := fs.initMsgBlock(index)
-			mb.first.seq, mb.last.seq, mb.msgs, mb.bytes = fseq, lseq, lseq-fseq+1, nbytes
+			atomic.StoreUint64(&mb.first.seq, fseq)
+			atomic.StoreUint64(&mb.last.seq, lseq)
+			mb.msgs, mb.bytes = lseq-fseq+1, nbytes
 			mb.first.ts, mb.last.ts = fts+baseTime, lts+baseTime
 			if numDeleted > 0 {
 				dmap, n, err := avl.Decode(buf[bi:])
@@ -1682,13 +1691,13 @@ func (fs *fileStore) recoverFullState() (rerr error) {
 			return err
 		}
 		if nmb != nil {
-			// Update top level accounting.
-			if fs.state.FirstSeq == 0 || nmb.first.seq < fs.state.FirstSeq {
-				fs.state.FirstSeq = nmb.first.seq
+			// Update top level accounting
+			if fseq := atomic.LoadUint64(&nmb.first.seq); fs.state.FirstSeq == 0 || fseq < fs.state.FirstSeq {
+				fs.state.FirstSeq = fseq
 				fs.state.FirstTime = time.Unix(0, nmb.first.ts).UTC()
 			}
-			if nmb.last.seq > fs.state.LastSeq {
-				fs.state.LastSeq = nmb.last.seq
+			if lseq := atomic.LoadUint64(&nmb.last.seq); lseq > fs.state.LastSeq {
+				fs.state.LastSeq = lseq
 				fs.state.LastTime = time.Unix(0, nmb.last.ts).UTC()
 			}
 			fs.state.Msgs += nmb.msgs
@@ -1714,7 +1723,7 @@ func (fs *fileStore) adjustAccounting(mb, nmb *msgBlock) {
 	// triggered limits exceeded will be handled after the recovery and prior to the stream
 	// being available to the system.
 	var smv StoreMsg
-	for seq := mb.last.seq + 1; seq <= nmb.last.seq; seq++ {
+	for seq, lseq := atomic.LoadUint64(&mb.last.seq)+1, atomic.LoadUint64(&nmb.last.seq); seq <= lseq; seq++ {
 		// Lookup the message. If an error will be deleted, so can skip.
 		sm, err := nmb.cacheLookup(seq, &smv)
 		if err != nil {
@@ -1730,25 +1739,25 @@ func (fs *fileStore) adjustAccounting(mb, nmb *msgBlock) {
 					info.lblk = nmb.index
 				}
 			} else {
-				fs.psim[sm.subj] = &psi{total: 1, fblk: nmb.index, lblk: nmb.index}
+				fs.psim[sm.subj] = &psi{total: 1, fblk: nmb.index, lblk: nmb.index, subj: sm.subj}
 				fs.tsl += len(sm.subj)
 			}
 		}
 	}
 
 	// Now check to see if we had a higher first for the recovered state mb vs nmb.
-	if nmb.first.seq < mb.first.seq {
+	if atomic.LoadUint64(&nmb.first.seq) < atomic.LoadUint64(&mb.first.seq) {
 		// Now set first for nmb.
-		nmb.first = mb.first
+		atomic.StoreUint64(&nmb.first.seq, atomic.LoadUint64(&mb.first.seq))
 	}
 
 	// Update top level accounting.
-	if fs.state.FirstSeq == 0 || nmb.first.seq < fs.state.FirstSeq {
-		fs.state.FirstSeq = nmb.first.seq
+	if fseq := atomic.LoadUint64(&nmb.first.seq); fs.state.FirstSeq == 0 || fseq < fs.state.FirstSeq {
+		fs.state.FirstSeq = fseq
 		fs.state.FirstTime = time.Unix(0, nmb.first.ts).UTC()
 	}
-	if nmb.last.seq > fs.state.LastSeq {
-		fs.state.LastSeq = nmb.last.seq
+	if lseq := atomic.LoadUint64(&nmb.last.seq); lseq > fs.state.LastSeq {
+		fs.state.LastSeq = lseq
 		fs.state.LastTime = time.Unix(0, nmb.last.ts).UTC()
 	}
 }
@@ -1792,6 +1801,32 @@ func (mb *msgBlock) lastChecksum() []byte {
 	return lchk[:]
 }
 
+// This will make sure we clean up old idx and fss files.
+func (fs *fileStore) cleanupOldMeta() {
+	fs.mu.RLock()
+	mdir := filepath.Join(fs.fcfg.StoreDir, msgDir)
+	fs.mu.RUnlock()
+
+	f, err := os.Open(mdir)
+	if err != nil {
+		return
+	}
+
+	dirs, _ := f.ReadDir(-1)
+	f.Close()
+
+	const (
+		minLen    = 4
+		idxSuffix = ".idx"
+		fssSuffix = ".fss"
+	)
+	for _, fi := range dirs {
+		if name := fi.Name(); strings.HasSuffix(name, idxSuffix) || strings.HasSuffix(name, fssSuffix) {
+			os.Remove(filepath.Join(mdir, name))
+		}
+	}
+}
+
 func (fs *fileStore) recoverMsgs() error {
 	fs.mu.Lock()
 	defer fs.mu.Unlock()
@@ -1836,17 +1871,21 @@ func (fs *fileStore) recoverMsgs() error {
 				fs.removeMsgBlockFromList(mb)
 				continue
 			}
-			if fs.state.FirstSeq == 0 || mb.first.seq < fs.state.FirstSeq {
-				fs.state.FirstSeq = mb.first.seq
+			if fseq := atomic.LoadUint64(&mb.first.seq); fs.state.FirstSeq == 0 || fseq < fs.state.FirstSeq {
+				fs.state.FirstSeq = fseq
 				if mb.first.ts == 0 {
 					fs.state.FirstTime = time.Time{}
 				} else {
 					fs.state.FirstTime = time.Unix(0, mb.first.ts).UTC()
 				}
 			}
-			if mb.last.seq > fs.state.LastSeq {
-				fs.state.LastSeq = mb.last.seq
-				fs.state.LastTime = time.Unix(0, mb.last.ts).UTC()
+			if lseq := atomic.LoadUint64(&mb.last.seq); lseq > fs.state.LastSeq {
+				fs.state.LastSeq = lseq
+				if mb.last.ts == 0 {
+					fs.state.LastTime = time.Time{}
+				} else {
+					fs.state.LastTime = time.Unix(0, mb.last.ts).UTC()
+				}
 			}
 			fs.state.Msgs += mb.msgs
 			fs.state.Bytes += mb.bytes
@@ -1924,7 +1963,8 @@ func (fs *fileStore) expireMsgsOnRecover() {
 		// If we are the last keep state to remember first/last sequence.
 		// Do this part by hand since not deleting one by one.
 		if mb == fs.lmb {
-			last = mb.last
+			last.seq = atomic.LoadUint64(&mb.last.seq)
+			last.ts = mb.last.ts
 		}
 		// Make sure we do subject cleanup as well.
 		mb.ensurePerSubjectInfoLoaded()
@@ -1964,7 +2004,8 @@ func (fs *fileStore) expireMsgsOnRecover() {
 
 		// Walk messages and remove if expired.
 		mb.ensurePerSubjectInfoLoaded()
-		for seq := mb.first.seq; seq <= mb.last.seq; seq++ {
+		fseq, lseq := atomic.LoadUint64(&mb.first.seq), atomic.LoadUint64(&mb.last.seq)
+		for seq := fseq; seq <= lseq; seq++ {
 			sm, err := mb.cacheLookup(seq, &smv)
 			// Process interior deleted msgs.
 			if err == errDeletedMsg {
@@ -1973,12 +2014,14 @@ func (fs *fileStore) expireMsgsOnRecover() {
 					mb.dmap.Delete(seq)
 				}
 				// Keep this updated just in case since we are removing dmap entries.
-				mb.first.seq, needNextFirst = seq, true
+				atomic.StoreUint64(&mb.first.seq, seq)
+				needNextFirst = true
 				continue
 			}
 			// Break on other errors.
 			if err != nil || sm == nil {
-				mb.first.seq, needNextFirst = seq, true
+				atomic.StoreUint64(&mb.first.seq, seq)
+				needNextFirst = true
 				break
 			}
 
@@ -1986,16 +2029,17 @@ func (fs *fileStore) expireMsgsOnRecover() {
 
 			// Check for done.
 			if minAge < sm.ts {
-				mb.first.seq, needNextFirst = sm.seq, false
-				mb.first.seq = sm.seq
+				atomic.StoreUint64(&mb.first.seq, sm.seq)
 				mb.first.ts = sm.ts
+				needNextFirst = false
 				nts = sm.ts
 				break
 			}
 
 			// Delete the message here.
 			if mb.msgs > 0 {
-				mb.first.seq, needNextFirst = seq, true
+				atomic.StoreUint64(&mb.first.seq, seq)
+				needNextFirst = true
 				sz := fileStoreMsgSize(sm.subj, sm.hdr, sm.msg)
 				if sz > mb.bytes {
 					sz = mb.bytes
@@ -2099,10 +2143,8 @@ func (fs *fileStore) GetSeqFromTime(t time.Time) uint64 {
 		return lastSeq + 1
 	}
 
-	mb.mu.RLock()
-	fseq := mb.first.seq
-	lseq := mb.last.seq
-	mb.mu.RUnlock()
+	fseq := atomic.LoadUint64(&mb.first.seq)
+	lseq := atomic.LoadUint64(&mb.last.seq)
 
 	var smv StoreMsg
 
@@ -2140,7 +2182,12 @@ func (mb *msgBlock) firstMatching(filter string, wc bool, start uint64, sm *Stor
 	// Skip scan of mb.fss if number of messages in the block are less than
 	// 1/2 the number of subjects in mb.fss. Or we have a wc and lots of fss entries.
 	const linearScanMaxFSS = 32
-	doLinearScan := isAll || 2*int(mb.last.seq-start) < len(mb.fss) || (wc && len(mb.fss) > linearScanMaxFSS)
+	// Make sure to start at mb.first.seq if fseq < mb.first.seq
+	if seq := atomic.LoadUint64(&mb.first.seq); seq > fseq {
+		fseq = seq
+	}
+	lseq := atomic.LoadUint64(&mb.last.seq)
+	doLinearScan := isAll || 2*int(lseq-fseq) < len(mb.fss) || (wc && len(mb.fss) > linearScanMaxFSS)
 
 	if !doLinearScan {
 		// If we have a wildcard match against all tracked subjects we know about.
@@ -2152,7 +2199,7 @@ func (mb *msgBlock) firstMatching(filter string, wc bool, start uint64, sm *Stor
 				}
 			}
 		}
-		fseq = mb.last.seq + 1
+		fseq = lseq + 1
 		for _, subj := range subs {
 			ss := mb.fss[subj]
 			if ss != nil && ss.firstNeedsUpdate {
@@ -2169,7 +2216,7 @@ func (mb *msgBlock) firstMatching(filter string, wc bool, start uint64, sm *Stor
 		}
 	}
 
-	if fseq > mb.last.seq {
+	if fseq > lseq {
 		return nil, false, ErrStoreMsgNotFound
 	}
 
@@ -2177,13 +2224,13 @@ func (mb *msgBlock) firstMatching(filter string, wc bool, start uint64, sm *Stor
 		sm = new(StoreMsg)
 	}
 
-	for seq := fseq; seq <= mb.last.seq; seq++ {
+	for seq := fseq; seq <= lseq; seq++ {
 		llseq := mb.llseq
 		fsm, err := mb.cacheLookup(seq, sm)
 		if err != nil {
 			continue
 		}
-		expireOk := seq == mb.last.seq && mb.llseq == seq
+		expireOk := seq == lseq && mb.llseq == seq
 		if isAll {
 			return fsm, expireOk, nil
 		}
@@ -2221,8 +2268,10 @@ func (mb *msgBlock) filteredPendingLocked(filter string, wc bool, sseq uint64) (
 
 	// First check if we can optimize this part.
 	// This means we want all and the starting sequence was before this block.
-	if isAll && sseq <= mb.first.seq {
-		return mb.msgs, mb.first.seq, mb.last.seq
+	if isAll {
+		if fseq := atomic.LoadUint64(&mb.first.seq); sseq <= fseq {
+			return mb.msgs, fseq, atomic.LoadUint64(&mb.last.seq)
+		}
 	}
 
 	update := func(ss *SimpleState) {
@@ -2287,7 +2336,7 @@ func (mb *msgBlock) filteredPendingLocked(filter string, wc bool, sseq uint64) (
 	}
 
 	var smv StoreMsg
-	for seq := sseq; seq <= mb.last.seq; seq++ {
+	for seq, lseq := sseq, atomic.LoadUint64(&mb.last.seq); seq <= lseq; seq++ {
 		sm, _ := mb.cacheLookup(seq, &smv)
 		if sm == nil {
 			continue
@@ -2554,7 +2603,7 @@ func (fs *fileStore) NumPending(sseq uint64, filter string, lastPerSubject bool)
 			mb := fs.blks[i]
 			mb.mu.Lock()
 			var t uint64
-			if isAll && sseq <= mb.first.seq {
+			if isAll && sseq <= atomic.LoadUint64(&mb.first.seq) {
 				if lastPerSubject {
 					mb.ensurePerSubjectInfoLoaded()
 					for subj := range mb.fss {
@@ -2607,7 +2656,7 @@ func (fs *fileStore) NumPending(sseq uint64, filter string, lastPerSubject bool)
 					shouldExpire = true
 				}
 				var smv StoreMsg
-				for seq := sseq; seq <= mb.last.seq; seq++ {
+				for seq, lseq := sseq, atomic.LoadUint64(&mb.last.seq); seq <= lseq; seq++ {
 					if sm, _ := mb.cacheLookup(seq, &smv); sm != nil && (isAll || isMatch(sm.subj)) {
 						t++
 					}
@@ -2679,7 +2728,7 @@ func (fs *fileStore) NumPending(sseq uint64, filter string, lastPerSubject bool)
 		var shouldExpire bool
 		mb.mu.Lock()
 		// Check if we should include all of this block in adjusting. If so work with metadata.
-		if sseq > mb.last.seq {
+		if sseq > atomic.LoadUint64(&mb.last.seq) {
 			if isAll && !lastPerSubject {
 				adjust += mb.msgs
 			} else {
@@ -2707,11 +2756,11 @@ func (fs *fileStore) NumPending(sseq uint64, filter string, lastPerSubject bool)
 				shouldExpire = true
 			}
 
-			var last = mb.last.seq
+			var last = atomic.LoadUint64(&mb.last.seq)
 			if sseq < last {
 				last = sseq
 			}
-			for seq := mb.first.seq; seq < last; seq++ {
+			for seq := atomic.LoadUint64(&mb.first.seq); seq < last; seq++ {
 				sm, _ := mb.cacheLookup(seq, &smv)
 				if sm == nil {
 					continue
@@ -2839,8 +2888,8 @@ func (fs *fileStore) newMsgBlockForWrite() (*msgBlock, error) {
 	ts := time.Now().UnixNano()
 	mb.llts, mb.lwts = 0, ts
 	// Remember our last sequence number.
-	mb.first.seq = fs.state.LastSeq + 1
-	mb.last.seq = fs.state.LastSeq
+	atomic.StoreUint64(&mb.first.seq, fs.state.LastSeq+1)
+	atomic.StoreUint64(&mb.last.seq, fs.state.LastSeq)
 	mb.mu.Unlock()
 
 	// Now do local hash.
@@ -2938,8 +2987,8 @@ func (fs *fileStore) storeRawMsg(subj string, hdr, msg []byte, seq uint64, ts in
 		if fs.cfg.MaxMsgs > 0 && fs.state.Msgs >= uint64(fs.cfg.MaxMsgs) && !asl {
 			return ErrMaxMsgs
 		}
-		if fs.cfg.MaxBytes > 0 && fs.state.Bytes+uint64(len(msg)+len(hdr)) >= uint64(fs.cfg.MaxBytes) {
-			if !asl || fs.sizeForSeq(fseq) <= len(msg)+len(hdr) {
+		if fs.cfg.MaxBytes > 0 && fs.state.Bytes+fileStoreMsgSize(subj, hdr, msg) >= uint64(fs.cfg.MaxBytes) {
+			if !asl || fs.sizeForSeq(fseq) <= int(fileStoreMsgSize(subj, hdr, msg)) {
 				return ErrMaxBytes
 			}
 		}
@@ -2960,7 +3009,7 @@ func (fs *fileStore) storeRawMsg(subj string, hdr, msg []byte, seq uint64, ts in
 	}
 
 	// Adjust top level tracking of per subject msg counts.
-	if len(subj) > 0 {
+	if len(subj) > 0 && fs.psim != nil {
 		index := fs.lmb.index
 		if info, ok := fs.psim[subj]; ok {
 			info.total++
@@ -2968,7 +3017,7 @@ func (fs *fileStore) storeRawMsg(subj string, hdr, msg []byte, seq uint64, ts in
 				info.lblk = index
 			}
 		} else {
-			fs.psim[subj] = &psi{total: 1, fblk: index, lblk: index}
+			fs.psim[subj] = &psi{total: 1, fblk: index, lblk: index, subj: subj}
 			fs.tsl += len(subj)
 		}
 	}
@@ -3083,9 +3132,9 @@ func (mb *msgBlock) skipMsg(seq uint64, now time.Time) {
 	mb.mu.Lock()
 	// If we are empty can just do meta.
 	if mb.msgs == 0 {
-		mb.last.seq = seq
+		atomic.StoreUint64(&mb.last.seq, seq)
 		mb.last.ts = nowts
-		mb.first.seq = seq + 1
+		atomic.StoreUint64(&mb.first.seq, seq+1)
 		mb.first.ts = nowts
 	} else {
 		needsRecord = true
@@ -3343,7 +3392,7 @@ func (fs *fileStore) EraseMsg(seq uint64) (bool, error) {
 // Convenience function to remove per subject tracking at the filestore level.
 // Lock should be held.
 func (fs *fileStore) removePerSubject(subj string) {
-	if len(subj) == 0 {
+	if len(subj) == 0 || fs.psim == nil {
 		return
 	}
 	// We do not update sense of fblk here but will do so when we resolve during lookup.
@@ -3411,7 +3460,7 @@ func (fs *fileStore) removeMsg(seq uint64, secure, viaLimits, needFSLock bool) (
 	mb.mu.Lock()
 
 	// See if we are closed or the sequence number is still relevant.
-	if mb.closed || seq < mb.first.seq {
+	if mb.closed || seq < atomic.LoadUint64(&mb.first.seq) {
 		mb.mu.Unlock()
 		fsUnlock()
 		return false, nil
@@ -3443,7 +3492,7 @@ func (fs *fileStore) removeMsg(seq uint64, secure, viaLimits, needFSLock bool) (
 			return false, ErrStoreClosed
 		}
 		mb.mu.Lock()
-		if mb.closed || seq < mb.first.seq {
+		if mb.closed || seq < atomic.LoadUint64(&mb.first.seq) {
 			mb.mu.Unlock()
 			fsUnlock()
 			return false, nil
@@ -3504,7 +3553,7 @@ func (fs *fileStore) removeMsg(seq uint64, secure, viaLimits, needFSLock bool) (
 		mb.eraseMsg(seq, int(ri), int(rl))
 	}
 
-	fifo := seq == mb.first.seq
+	fifo := seq == atomic.LoadUint64(&mb.first.seq)
 	isLastBlock := mb == fs.lmb
 	isEmpty := mb.msgs == 0
 
@@ -3513,7 +3562,7 @@ func (fs *fileStore) removeMsg(seq uint64, secure, viaLimits, needFSLock bool) (
 		if !isEmpty {
 			// Can update this one in place.
 			if seq == fs.state.FirstSeq {
-				fs.state.FirstSeq = mb.first.seq // new one.
+				fs.state.FirstSeq = atomic.LoadUint64(&mb.first.seq) // new one.
 				if mb.first.ts == 0 {
 					fs.state.FirstTime = time.Time{}
 				} else {
@@ -3611,8 +3660,9 @@ func (mb *msgBlock) compact() {
 	var le = binary.LittleEndian
 	var firstSet bool
 
+	fseq := atomic.LoadUint64(&mb.first.seq)
 	isDeleted := func(seq uint64) bool {
-		return seq == 0 || seq&ebit != 0 || seq < mb.first.seq || mb.dmap.Exists(seq)
+		return seq == 0 || seq&ebit != 0 || mb.dmap.Exists(seq) || seq < fseq
 	}
 
 	for index, lbuf := uint32(0), uint32(len(buf)); index < lbuf; {
@@ -3635,21 +3685,21 @@ func (mb *msgBlock) compact() {
 			// Check for tombstones.
 			if seq&tbit != 0 {
 				// If we are last mb we should consider to keep these unless the tombstone reflects a seq in this mb.
-				if mb == mb.fs.lmb && seq < mb.first.seq {
+				if mb == mb.fs.lmb && seq < fseq {
 					nbuf = append(nbuf, buf[index:index+rl]...)
 				}
 			} else {
 				// Normal message here.
 				nbuf = append(nbuf, buf[index:index+rl]...)
 				if !firstSet {
-					firstSet = true
-					mb.first.seq = seq
+					firstSet, fseq = true, seq
+					atomic.StoreUint64(&mb.first.seq, seq)
 				}
 			}
 		}
 		// Always set last as long as not a tombstone.
 		if seq&tbit == 0 {
-			mb.last.seq = seq &^ ebit
+			atomic.StoreUint64(&mb.last.seq, seq&^ebit)
 		}
 		// Advance to next record.
 		index += rl
@@ -3905,7 +3955,7 @@ func (mb *msgBlock) truncate(sm *StoreMsg) (nmsgs, nbytes uint64, err error) {
 	checkDmap := mb.dmap.Size() > 0
 	var smv StoreMsg
 
-	for seq := mb.last.seq; seq > sm.seq; seq-- {
+	for seq := atomic.LoadUint64(&mb.last.seq); seq > sm.seq; seq-- {
 		if checkDmap {
 			if mb.dmap.Exists(seq) {
 				// Delete and skip to next.
@@ -3992,7 +4042,7 @@ func (mb *msgBlock) truncate(sm *StoreMsg) (nmsgs, nbytes uint64, err error) {
 	}
 
 	// Update our last msg.
-	mb.last.seq = sm.seq
+	atomic.StoreUint64(&mb.last.seq, sm.seq)
 	mb.last.ts = sm.ts
 
 	// Clear our cache.
@@ -4009,15 +4059,16 @@ func (mb *msgBlock) truncate(sm *StoreMsg) (nmsgs, nbytes uint64, err error) {
 	return purged, bytes, nil
 }
 
-// Lock should be held.
+// Helper to determine if the mb is empty.
 func (mb *msgBlock) isEmpty() bool {
-	return mb.first.seq > mb.last.seq
+	return atomic.LoadUint64(&mb.first.seq) > atomic.LoadUint64(&mb.last.seq)
 }
 
 // Lock should be held.
 func (mb *msgBlock) selectNextFirst() {
 	var seq uint64
-	for seq = mb.first.seq + 1; seq <= mb.last.seq; seq++ {
+	fseq, lseq := atomic.LoadUint64(&mb.first.seq), atomic.LoadUint64(&mb.last.seq)
+	for seq = fseq + 1; seq <= lseq; seq++ {
 		if mb.dmap.Exists(seq) {
 			// We will move past this so we can delete the entry.
 			mb.dmap.Delete(seq)
@@ -4026,10 +4077,10 @@ func (mb *msgBlock) selectNextFirst() {
 		}
 	}
 	// Set new first sequence.
-	mb.first.seq = seq
+	atomic.StoreUint64(&mb.first.seq, seq)
 
 	// Check if we are empty..
-	if mb.isEmpty() {
+	if seq > lseq {
 		mb.first.ts = 0
 		return
 	}
@@ -4057,7 +4108,7 @@ func (fs *fileStore) selectNextFirst() {
 	if len(fs.blks) > 0 {
 		mb := fs.blks[0]
 		mb.mu.RLock()
-		fs.state.FirstSeq = mb.first.seq
+		fs.state.FirstSeq = atomic.LoadUint64(&mb.first.seq)
 		fs.state.FirstTime = time.Unix(0, mb.first.ts).UTC()
 		mb.mu.RUnlock()
 	} else {
@@ -4307,7 +4358,7 @@ func (fs *fileStore) checkMsgs() *LostStreamData {
 		// FIXME(dlc) - check tombstones here too?
 		if ld, _, err := mb.rebuildState(); err != nil && ld != nil {
 			// Rebuild fs state too.
-			mb.fs.rebuildStateLocked(ld)
+			fs.rebuildStateLocked(ld)
 		}
 		fs.populateGlobalPerSubjectInfo(mb)
 	}
@@ -4545,8 +4596,9 @@ func (mb *msgBlock) updateAccounting(seq uint64, ts int64, rl uint64) {
 		seq = seq &^ ebit
 	}
 
-	if (mb.first.seq == 0 || mb.first.ts == 0) && seq >= mb.first.seq {
-		mb.first.seq = seq
+	fseq := atomic.LoadUint64(&mb.first.seq)
+	if (fseq == 0 || mb.first.ts == 0) && seq >= fseq {
+		atomic.StoreUint64(&mb.first.seq, seq)
 		mb.first.ts = ts
 	}
 	// Need atomics here for selectMsgBlock speed.
@@ -4785,8 +4837,9 @@ func (fs *fileStore) syncBlocks() {
 		}
 		// Check if we should compact here as well.
 		// Do not compact last mb.
+		var needsCompact bool
 		if mb != lmb && mb.ensureRawBytesLoaded() == nil && mb.rbytes > mb.bytes {
-			mb.compact()
+			needsCompact = true
 			markDirty = true
 		}
 
@@ -4798,6 +4851,16 @@ func (fs *fileStore) syncBlocks() {
 		}
 		mb.mu.Unlock()
 
+		// Check if we should compact here.
+		// Need to hold fs lock in case we reference psim when loading in the mb.
+		if needsCompact {
+			fs.mu.RLock()
+			mb.mu.Lock()
+			mb.compact()
+			mb.mu.Unlock()
+			fs.mu.RUnlock()
+		}
+
 		// Check if we need to sync.
 		// This is done not holding any locks.
 		if needSync {
@@ -4815,7 +4878,11 @@ func (fs *fileStore) syncBlocks() {
 	}
 
 	fs.mu.Lock()
-	fs.syncTmr = time.AfterFunc(fs.fcfg.SyncInterval, fs.syncBlocks)
+	if fs.closed {
+		fs.mu.Unlock()
+		return
+	}
+	fs.setSyncTimer()
 	fn := filepath.Join(fs.fcfg.StoreDir, msgDir, streamStreamStateFile)
 	syncAlways := fs.fcfg.SyncAlways
 	if markDirty {
@@ -4911,9 +4978,11 @@ func (mb *msgBlock) indexCacheBuf(buf []byte) error {
 	var idx []uint32
 	var index uint32
 
+	mbFirstSeq := atomic.LoadUint64(&mb.first.seq)
+
 	if mb.cache == nil {
 		// Approximation, may adjust below.
-		fseq = mb.first.seq
+		fseq = mbFirstSeq
 		idx = make([]uint32, 0, mb.msgs)
 		mb.cache = &cache{}
 	} else {
@@ -4962,11 +5031,11 @@ func (mb *msgBlock) indexCacheBuf(buf []byte) error {
 
 		// We defer checksum checks to individual msg cache lookups to amortorize costs and
 		// not introduce latency for first message from a newly loaded block.
-		if seq >= mb.first.seq {
+		if seq >= mbFirstSeq {
 			// Track that we do not have holes.
-			if slot := int(seq - mb.first.seq); slot != len(idx) {
+			if slot := int(seq - mbFirstSeq); slot != len(idx) {
 				// If we have a hole fill it.
-				for dseq := mb.first.seq + uint64(len(idx)); dseq < seq; dseq++ {
+				for dseq := mbFirstSeq + uint64(len(idx)); dseq < seq; dseq++ {
 					idx = append(idx, dbit)
 					mb.dmap.Insert(dseq)
 				}
@@ -4991,8 +5060,7 @@ func (mb *msgBlock) indexCacheBuf(buf []byte) error {
 					ss.Msgs++
 					ss.Last = seq
 				} else {
-					subj := mb.subjString(bsubj)
-					mb.fss[subj] = &SimpleState{Msgs: 1, First: seq, Last: seq}
+					mb.fss[mb.subjString(bsubj)] = &SimpleState{Msgs: 1, First: seq, Last: seq}
 				}
 			}
 		}
@@ -5163,7 +5231,7 @@ func (mb *msgBlock) cacheAlreadyLoaded() bool {
 	if mb.cache == nil || mb.cache.off != 0 || mb.cache.fseq == 0 || len(mb.cache.buf) == 0 {
 		return false
 	}
-	numEntries := mb.msgs + uint64(mb.dmap.Size()) + (mb.first.seq - mb.cache.fseq)
+	numEntries := mb.msgs + uint64(mb.dmap.Size()) + (atomic.LoadUint64(&mb.first.seq) - mb.cache.fseq)
 	return numEntries == uint64(len(mb.cache.idx))
 }
 
@@ -5334,7 +5402,7 @@ func (mb *msgBlock) fetchMsg(seq uint64, sm *StoreMsg) (*StoreMsg, bool, error)
 	if err != nil {
 		return nil, false, err
 	}
-	expireOk := seq == mb.last.seq && mb.llseq == seq
+	expireOk := seq == atomic.LoadUint64(&mb.last.seq) && mb.llseq == seq
 	return fsm, expireOk, err
 }
 
@@ -5372,7 +5440,7 @@ const (
 // Will do a lookup from cache.
 // Lock should be held.
 func (mb *msgBlock) cacheLookup(seq uint64, sm *StoreMsg) (*StoreMsg, error) {
-	if seq < mb.first.seq || seq > mb.last.seq {
+	if seq < atomic.LoadUint64(&mb.first.seq) || seq > atomic.LoadUint64(&mb.last.seq) {
 		return nil, ErrStoreMsgNotFound
 	}
 
@@ -5566,50 +5634,22 @@ func (mb *msgBlock) msgFromBuf(buf []byte, sm *StoreMsg, hh hash.Hash64) (*Store
 	return sm, nil
 }
 
-// Used to intern strings for subjects.
-// Based on idea from https://github.com/josharian/intern/blob/master/intern.go
-var subjPool = sync.Pool{
-	New: func() any {
-		return make(map[string]string)
-	},
-}
-
-// Get an interned string from a byte slice.
-func subjFromBytes(b []byte) string {
-	sm := subjPool.Get().(map[string]string)
-	defer subjPool.Put(sm)
-	subj, ok := sm[string(b)]
-	if ok {
-		return subj
-	}
-	s := string(b)
-	sm[s] = s
-	return s
-}
-
 // Given the `key` byte slice, this function will return the subject
 // as an interned string of `key` or a configured subject as to minimize memory allocations.
+// We used to have a pool structure when we leaned on block fss, which could duplicate subjects.
+// Now we have fs scoped PSIM that is always present and is already tracking all in-use subjects.
 // Lock should be held.
 func (fs *fileStore) subjString(skey []byte) string {
 	if fs == nil || len(skey) == 0 {
 		return _EMPTY_
 	}
-
-	if lsubjs := len(fs.cfg.Subjects); lsubjs > 0 {
-		if lsubjs == 1 {
-			// The cast for the comparison does not make a copy
-			if string(skey) == fs.cfg.Subjects[0] {
-				return fs.cfg.Subjects[0]
-			}
-		} else {
-			for _, subj := range fs.cfg.Subjects {
-				if string(skey) == subj {
-					return subj
-				}
-			}
+	if len(fs.psim) > 0 {
+		// Cast in place below to avoid allocation for lookup.
+		if psi := fs.psim[string(skey)]; psi != nil {
+			return psi.subj
 		}
 	}
-	return subjFromBytes(skey)
+	return string(skey)
 }
 
 // Given the `key` byte slice, this function will return the subject
@@ -5667,7 +5707,7 @@ func (fs *fileStore) loadLast(subj string, sm *StoreMsg) (lsm *StoreMsg, err err
 			}
 		}
 		if l == 0 {
-			_, _, l = mb.filteredPendingLocked(subj, wc, mb.first.seq)
+			_, _, l = mb.filteredPendingLocked(subj, wc, atomic.LoadUint64(&mb.first.seq))
 		}
 		if l > 0 {
 			if mb.cacheNotLoaded() {
@@ -5774,14 +5814,14 @@ func (fs *fileStore) State() StreamState {
 
 		for _, mb := range fs.blks {
 			mb.mu.Lock()
-			fseq := mb.first.seq
+			fseq := atomic.LoadUint64(&mb.first.seq)
 			// Account for messages missing from the head.
 			if fseq > cur {
 				for seq := cur; seq < fseq; seq++ {
 					state.Deleted = append(state.Deleted, seq)
 				}
 			}
-			cur = mb.last.seq + 1 // Expected next first.
+			cur = atomic.LoadUint64(&mb.last.seq) + 1 // Expected next first.
 
 			mb.dmap.Range(func(seq uint64) bool {
 				if seq < fseq {
@@ -5910,9 +5950,9 @@ func (mb *msgBlock) readIndexInfo() error {
 	}
 	mb.msgs = readCount()
 	mb.bytes = readCount()
-	mb.first.seq = readSeq()
+	atomic.StoreUint64(&mb.first.seq, readSeq())
 	mb.first.ts = readTimeStamp()
-	mb.last.seq = readSeq()
+	atomic.StoreUint64(&mb.last.seq, readSeq())
 	mb.last.ts = readTimeStamp()
 	dmapLen := readCount()
 
@@ -5923,7 +5963,7 @@ func (mb *msgBlock) readIndexInfo() error {
 	}
 
 	// Check for consistency if accounting. If something is off bail and we will rebuild.
-	if mb.msgs != (mb.last.seq-mb.first.seq+1)-dmapLen {
+	if mb.msgs != (atomic.LoadUint64(&mb.last.seq)-atomic.LoadUint64(&mb.first.seq)+1)-dmapLen {
 		os.Remove(ifn)
 		return fmt.Errorf("accounting inconsistent")
 	}
@@ -5943,12 +5983,12 @@ func (mb *msgBlock) readIndexInfo() error {
 			mb.dmap = *dmap
 		} else {
 			// This is the old version.
-			for i := 0; i < int(dmapLen); i++ {
+			for i, fseq := 0, atomic.LoadUint64(&mb.first.seq); i < int(dmapLen); i++ {
 				seq := readSeq()
 				if seq == 0 {
 					break
 				}
-				mb.dmap.Insert(seq + mb.first.seq)
+				mb.dmap.Insert(seq + fseq)
 			}
 		}
 	}
@@ -6050,7 +6090,7 @@ func (fs *fileStore) PurgeEx(subject string, sequence, keep uint64) (purged uint
 			mb.mu.Unlock()
 			continue
 		}
-		t, f, l := mb.filteredPendingLocked(subject, wc, mb.first.seq)
+		t, f, l := mb.filteredPendingLocked(subject, wc, atomic.LoadUint64(&mb.first.seq))
 		if t == 0 {
 			mb.mu.Unlock()
 			continue
@@ -6092,7 +6132,7 @@ func (fs *fileStore) PurgeEx(subject string, sequence, keep uint64) (purged uint
 				fs.removePerSubject(sm.subj)
 
 				// Check for first message.
-				if seq == mb.first.seq {
+				if seq == atomic.LoadUint64(&mb.first.seq) {
 					mb.selectNextFirst()
 					if mb.isEmpty() {
 						fs.removeMsgBlock(mb)
@@ -6100,7 +6140,7 @@ func (fs *fileStore) PurgeEx(subject string, sequence, keep uint64) (purged uint
 						// keep flag set, if set previously
 						firstSeqNeedsUpdate = firstSeqNeedsUpdate || seq == fs.state.FirstSeq
 					} else if seq == fs.state.FirstSeq {
-						fs.state.FirstSeq = mb.first.seq // new one.
+						fs.state.FirstSeq = atomic.LoadUint64(&mb.first.seq) // new one.
 						fs.state.FirstTime = time.Unix(0, mb.first.ts).UTC()
 					}
 				} else {
@@ -6205,14 +6245,14 @@ func (fs *fileStore) purge(fseq uint64) (uint64, error) {
 	}
 
 	lmb := fs.lmb
-	lmb.first.seq = fs.state.FirstSeq
-	lmb.last.seq = fs.state.LastSeq
+	atomic.StoreUint64(&lmb.first.seq, fs.state.FirstSeq)
+	atomic.StoreUint64(&lmb.last.seq, fs.state.LastSeq)
 	lmb.last.ts = fs.state.LastTime.UnixNano()
 
-	if fs.lmb.last.seq > 1 {
+	if lseq := atomic.LoadUint64(&lmb.last.seq); lseq > 1 {
 		// Leave a tombstone so we can remember our starting sequence in case
 		// full state becomes corrupted.
-		lmb.writeTombstone(fs.lmb.last.seq, fs.lmb.last.ts)
+		lmb.writeTombstone(lseq, lmb.last.ts)
 	}
 
 	cb := fs.scb
@@ -6275,7 +6315,7 @@ func (fs *fileStore) Compact(seq uint64) (uint64, error) {
 	var isEmpty bool
 
 	smb.mu.Lock()
-	if smb.first.seq == seq {
+	if atomic.LoadUint64(&smb.first.seq) == seq {
 		goto SKIP
 	}
 
@@ -6285,7 +6325,7 @@ func (fs *fileStore) Compact(seq uint64) (uint64, error) {
 			goto SKIP
 		}
 	}
-	for mseq := smb.first.seq; mseq < seq; mseq++ {
+	for mseq := atomic.LoadUint64(&smb.first.seq); mseq < seq; mseq++ {
 		sm, err := smb.cacheLookup(mseq, &smv)
 		if err == errDeletedMsg {
 			// Update dmap.
@@ -6314,24 +6354,24 @@ func (fs *fileStore) Compact(seq uint64) (uint64, error) {
 	if isEmpty {
 		smb.dirtyCloseWithRemove(true)
 		// Update fs first here as well.
-		fs.state.FirstSeq = smb.last.seq + 1
+		fs.state.FirstSeq = atomic.LoadUint64(&smb.last.seq) + 1
 		fs.state.FirstTime = time.Time{}
 		deleted++
 	} else {
 		// Make sure to sync changes.
 		smb.needSync = true
 		// Update fs first seq and time.
-		smb.first.seq = seq - 1 // Just for start condition for selectNextFirst.
+		atomic.StoreUint64(&smb.first.seq, seq-1) // Just for start condition for selectNextFirst.
 		smb.selectNextFirst()
 
-		fs.state.FirstSeq = smb.first.seq
+		fs.state.FirstSeq = atomic.LoadUint64(&smb.first.seq)
 		fs.state.FirstTime = time.Unix(0, smb.first.ts).UTC()
 
 		// Check if we should reclaim the head space from this block.
 		// This will be optimistic only, so don't continue if we encounter any errors here.
 		if smb.rbytes > compactMinimum && smb.bytes*2 < smb.rbytes {
 			var moff uint32
-			moff, _, _, err = smb.slotInfo(int(smb.first.seq - smb.cache.fseq))
+			moff, _, _, err = smb.slotInfo(int(atomic.LoadUint64(&smb.first.seq) - smb.cache.fseq))
 			if err != nil || moff >= uint32(len(smb.cache.buf)) {
 				goto SKIP
 			}
@@ -6595,12 +6635,12 @@ func (fs *fileStore) removeMsgBlock(mb *msgBlock) {
 	fs.removeMsgBlockFromList(mb)
 	// Check for us being last message block
 	if mb == fs.lmb {
-		last := mb.last
+		lseq, lts := atomic.LoadUint64(&mb.last.seq), mb.last.ts
 		// Creating a new message write block requires that the lmb lock is not held.
 		mb.mu.Unlock()
 		// Write the tombstone to remember since this was last block.
 		if lmb, _ := fs.newMsgBlockForWrite(); lmb != nil {
-			lmb.writeTombstone(last.seq, last.ts)
+			lmb.writeTombstone(lseq, lts)
 		}
 		mb.mu.Lock()
 	}
@@ -6704,7 +6744,7 @@ func (mb *msgBlock) recalculateFirstForSubj(subj string, startSeq uint64, ss *Si
 	}
 
 	var le = binary.LittleEndian
-	for slot := startSlot; slot < len(mb.cache.idx); slot++ {
+	for slot, fseq := startSlot, atomic.LoadUint64(&mb.first.seq); slot < len(mb.cache.idx); slot++ {
 		bi := mb.cache.idx[slot] &^ hbit
 		if bi == dbit {
 			// delete marker so skip.
@@ -6720,7 +6760,7 @@ func (mb *msgBlock) recalculateFirstForSubj(subj string, startSeq uint64, ss *Si
 		slen := int(le.Uint16(hdr[20:]))
 		if subj == string(buf[msgHdrSize:msgHdrSize+slen]) {
 			seq := le.Uint64(hdr[4:])
-			if seq < mb.first.seq || seq&ebit != 0 || mb.dmap.Exists(seq) {
+			if seq < fseq || seq&ebit != 0 || mb.dmap.Exists(seq) {
 				continue
 			}
 			ss.First = seq
@@ -6766,7 +6806,7 @@ func (mb *msgBlock) generatePerSubjectInfo() error {
 	mb.fss = make(map[string]*SimpleState)
 
 	var smv StoreMsg
-	fseq, lseq := mb.first.seq, mb.last.seq
+	fseq, lseq := atomic.LoadUint64(&mb.first.seq), atomic.LoadUint64(&mb.last.seq)
 	for seq := fseq; seq <= lseq; seq++ {
 		sm, err := mb.cacheLookup(seq, &smv)
 		if err != nil {
@@ -6829,7 +6869,7 @@ func (fs *fileStore) populateGlobalPerSubjectInfo(mb *msgBlock) {
 					info.lblk = mb.index
 				}
 			} else {
-				fs.psim[subj] = &psi{total: ss.Msgs, fblk: mb.index, lblk: mb.index}
+				fs.psim[subj] = &psi{total: ss.Msgs, fblk: mb.index, lblk: mb.index, subj: subj}
 				fs.tsl += len(subj)
 			}
 		}
@@ -6911,18 +6951,41 @@ func (fs *fileStore) Delete() error {
 		return err
 	}
 
-	err := os.RemoveAll(fs.fcfg.StoreDir)
-	if err == nil {
-		return nil
+	// Make sure we will not try to recover if killed before removal below completes.
+	if err := os.Remove(filepath.Join(fs.fcfg.StoreDir, JetStreamMetaFile)); err != nil {
+		return err
 	}
-	ttl := time.Now().Add(time.Second)
-	for time.Now().Before(ttl) {
-		time.Sleep(10 * time.Millisecond)
-		if err = os.RemoveAll(fs.fcfg.StoreDir); err == nil {
-			return nil
+	// Now move into different directory with "." prefix.
+	ndir := filepath.Join(filepath.Dir(fs.fcfg.StoreDir), tsep+filepath.Base(fs.fcfg.StoreDir))
+	if err := os.Rename(fs.fcfg.StoreDir, ndir); err != nil {
+		return err
+	}
+	// Do this in separate Go routine in case lots of blocks.
+	// Purge above protects us as does the removal of meta artifacts above.
+	go func() {
+		err := os.RemoveAll(ndir)
+		if err == nil {
+			return
 		}
+		ttl := time.Now().Add(time.Second)
+		for time.Now().Before(ttl) {
+			time.Sleep(10 * time.Millisecond)
+			if err = os.RemoveAll(ndir); err == nil {
+				return
+			}
+		}
+	}()
+
+	return nil
+}
+
+// Lock should be held.
+func (fs *fileStore) setSyncTimer() {
+	if fs.syncTmr != nil {
+		fs.syncTmr.Reset(fs.fcfg.SyncInterval)
+	} else {
+		fs.syncTmr = time.AfterFunc(fs.fcfg.SyncInterval, fs.syncBlocks)
 	}
-	return err
 }
 
 // Lock should be held.
@@ -6942,12 +7005,39 @@ const (
 // This will get kicked when we create a new block or when we delete a block in general.
 // This is also called during Stop().
 func (fs *fileStore) flushStreamStateLoop(fch, qch, done chan struct{}) {
+	// Make sure we do not try to write these out too fast.
+	const writeThreshold = time.Second * 10
+	lastWrite := time.Time{}
+
+	// We will use these to complete the full state write while not doing them too fast.
+	var dt *time.Timer
+	var dtc <-chan time.Time
+
+	defer close(done)
+
 	for {
 		select {
 		case <-fch:
+			if elapsed := time.Since(lastWrite); elapsed > writeThreshold {
+				fs.writeFullState()
+				lastWrite = time.Now()
+				if dt != nil {
+					dt.Stop()
+					dt, dtc = nil, nil
+				}
+			} else if dtc == nil {
+				fireIn := time.Until(lastWrite.Add(writeThreshold))
+				if fireIn < 0 {
+					fireIn = 100 * time.Millisecond
+				}
+				dt = time.NewTimer(fireIn)
+				dtc = dt.C
+			}
+		case <-dtc:
 			fs.writeFullState()
+			lastWrite = time.Now()
+			dt, dtc = nil, nil
 		case <-qch:
-			close(done)
 			return
 		}
 	}
@@ -6981,6 +7071,13 @@ func (fs *fileStore) writeFullState() error {
 		return nil
 	}
 
+	// We track this through subsequent runs to get an avg per blk used for subsequent runs.
+	avgDmapLen := fs.adml
+	// If first time through could be 0
+	if avgDmapLen == 0 && ((fs.state.LastSeq-fs.state.FirstSeq+1)-fs.state.Msgs) > 0 {
+		avgDmapLen = 1024
+	}
+
 	// For calculating size.
 	numSubjects := len(fs.psim)
 
@@ -6989,10 +7086,20 @@ func (fs *fileStore) writeFullState() error {
 		(binary.MaxVarintLen64 * 6) + // FS data
 		binary.MaxVarintLen64 + fs.tsl + // NumSubjects + total subject length
 		numSubjects*(binary.MaxVarintLen64*4) + // psi record
-		len(fs.blks)*((binary.MaxVarintLen64*6)+512) + // msg blocks, 512 is est for dmap
-		binary.MaxVarintLen64 + 8 // last index + checksum
+		binary.MaxVarintLen64 + // Num blocks.
+		len(fs.blks)*((binary.MaxVarintLen64*7)+avgDmapLen) + // msg blocks, avgDmapLen is est for dmaps
+		binary.MaxVarintLen64 + 8 + 8 // last index + record checksum + full state checksum
+
+	// Do 4k on stack if possible.
+	var raw [4 * 1024]byte
+	var buf []byte
+
+	if sz <= cap(raw) {
+		buf, sz = raw[0:2:cap(raw)], cap(raw)
+	} else {
+		buf = make([]byte, hdrLen, sz)
+	}
 
-	buf := make([]byte, hdrLen, sz)
 	buf[0], buf[1] = fullStateMagic, fullStateVersion
 	buf = binary.AppendUvarint(buf, fs.state.Msgs)
 	buf = binary.AppendUvarint(buf, fs.state.Bytes)
@@ -7026,19 +7133,21 @@ func (fs *fileStore) writeFullState() error {
 	baseTime := timestampNormalized(fs.state.FirstTime)
 	var scratch [8 * 1024]byte
 
+	var dmapTotalLen int
 	for _, mb := range fs.blks {
 		mb.mu.RLock()
 		buf = binary.AppendUvarint(buf, uint64(mb.index))
 		buf = binary.AppendUvarint(buf, mb.bytes)
-		buf = binary.AppendUvarint(buf, mb.first.seq)
+		buf = binary.AppendUvarint(buf, atomic.LoadUint64(&mb.first.seq))
 		buf = binary.AppendVarint(buf, mb.first.ts-baseTime)
-		buf = binary.AppendUvarint(buf, mb.last.seq)
+		buf = binary.AppendUvarint(buf, atomic.LoadUint64(&mb.last.seq))
 		buf = binary.AppendVarint(buf, mb.last.ts-baseTime)
 
 		numDeleted := mb.dmap.Size()
 		buf = binary.AppendUvarint(buf, uint64(numDeleted))
 		if numDeleted > 0 {
 			dmap, _ := mb.dmap.Encode(scratch[:0])
+			dmapTotalLen += len(dmap)
 			buf = append(buf, dmap...)
 		}
 		// If this is the last one grab the last checksum and the block index, e.g. 22.blk, 22 is the block index.
@@ -7050,6 +7159,9 @@ func (fs *fileStore) writeFullState() error {
 		}
 		mb.mu.RUnlock()
 	}
+	if dmapTotalLen > 0 {
+		fs.adml = dmapTotalLen / len(fs.blks)
+	}
 
 	// Place block index and hash onto the end.
 	buf = binary.AppendUvarint(buf, uint64(lbi))
@@ -7114,11 +7226,15 @@ func (fs *fileStore) writeFullState() error {
 // Stop the current filestore.
 func (fs *fileStore) Stop() error {
 	fs.mu.Lock()
-	if fs.closed {
+	if fs.closed || fs.closing {
 		fs.mu.Unlock()
 		return ErrStoreClosed
 	}
 
+	// Mark as closing. Do before releasing the lock to writeFullState
+	// so we don't end up with this function running more than once.
+	fs.closing = true
+
 	fs.checkAndFlushAllBlocks()
 	fs.closeAllMsgBlocks(false)
 
@@ -7126,7 +7242,10 @@ func (fs *fileStore) Stop() error {
 	fs.cancelAgeChk()
 
 	// Release the state flusher loop.
-	close(fs.qch)
+	if fs.qch != nil {
+		close(fs.qch)
+		fs.qch = nil
+	}
 
 	// Wait for the state flush loop to exit.
 	fsld := fs.fsld
@@ -7136,7 +7255,8 @@ func (fs *fileStore) Stop() error {
 	fs.writeFullState()
 	fs.mu.Lock()
 
-	// Mark as closed.
+	// Mark as closed. Last message block needs to be cleared after
+	// writeFullState has completed.
 	fs.closed = true
 	fs.lmb = nil
 
@@ -7468,14 +7588,14 @@ func (fs *fileStore) deleteBlocks() DeleteBlocks {
 
 	for _, mb := range fs.blks {
 		// Detect if we have a gap between these blocks.
-		if prevLast > 0 && prevLast+1 != mb.first.seq {
-			gap := mb.first.seq - prevLast - 1
-			dbs = append(dbs, &DeleteRange{First: prevLast + 1, Num: gap})
+		fseq := atomic.LoadUint64(&mb.first.seq)
+		if prevLast > 0 && prevLast+1 != fseq {
+			dbs = append(dbs, &DeleteRange{First: prevLast + 1, Num: fseq - prevLast - 1})
 		}
 		if mb.dmap.Size() > 0 {
 			dbs = append(dbs, &mb.dmap)
 		}
-		prevLast = mb.last.seq
+		prevLast = atomic.LoadUint64(&mb.last.seq)
 	}
 	return dbs
 }
diff --git a/vendor/github.com/nats-io/nats-server/v2/server/jetstream.go b/vendor/github.com/nats-io/nats-server/v2/server/jetstream.go
index e029f4cdf1..cc23fd4b22 100644
--- a/vendor/github.com/nats-io/nats-server/v2/server/jetstream.go
+++ b/vendor/github.com/nats-io/nats-server/v2/server/jetstream.go
@@ -1197,6 +1197,11 @@ func (a *Account) EnableJetStream(limits map[string]JetStreamAccountLimits) erro
 	fis, _ := os.ReadDir(sdir)
 	for _, fi := range fis {
 		mdir := filepath.Join(sdir, fi.Name())
+		// Check for partially deleted streams. They are marked with "." prefix.
+		if strings.HasPrefix(fi.Name(), tsep) {
+			go os.RemoveAll(mdir)
+			continue
+		}
 		key := sha256.Sum256([]byte(fi.Name()))
 		hh, err := highwayhash.New64(key[:])
 		if err != nil {
diff --git a/vendor/github.com/nats-io/nats-server/v2/server/jetstream_cluster.go b/vendor/github.com/nats-io/nats-server/v2/server/jetstream_cluster.go
index b5436facb3..d55b3e0bcd 100644
--- a/vendor/github.com/nats-io/nats-server/v2/server/jetstream_cluster.go
+++ b/vendor/github.com/nats-io/nats-server/v2/server/jetstream_cluster.go
@@ -2143,7 +2143,7 @@ func (js *jetStream) monitorStream(mset *stream, sa *streamAssignment, sendSnaps
 	// from underneath the one that is running since it will be the same raft node.
 	defer n.Stop()
 
-	qch, lch, aq, uch, ourPeerId := n.QuitC(), n.LeadChangeC(), n.ApplyQ(), mset.updateC(), meta.ID()
+	qch, mqch, lch, aq, uch, ourPeerId := n.QuitC(), mset.monitorQuitC(), n.LeadChangeC(), n.ApplyQ(), mset.updateC(), meta.ID()
 
 	s.Debugf("Starting stream monitor for '%s > %s' [%s]", sa.Client.serviceAccount(), sa.Config.Name, n.Group())
 	defer s.Debugf("Exiting stream monitor for '%s > %s' [%s]", sa.Client.serviceAccount(), sa.Config.Name, n.Group())
@@ -2249,7 +2249,7 @@ func (js *jetStream) monitorStream(mset *stream, sa *streamAssignment, sendSnaps
 
 	startDirectAccessMonitoring := func() {
 		if dat == nil {
-			dat = time.NewTicker(1 * time.Second)
+			dat = time.NewTicker(2 * time.Second)
 			datc = dat.C
 		}
 	}
@@ -2301,6 +2301,8 @@ func (js *jetStream) monitorStream(mset *stream, sa *streamAssignment, sendSnaps
 		select {
 		case <-s.quitCh:
 			return
+		case <-mqch:
+			return
 		case <-qch:
 			return
 		case <-aq.ch:
@@ -2322,6 +2324,10 @@ func (js *jetStream) monitorStream(mset *stream, sa *streamAssignment, sendSnaps
 					ne, nb = n.Applied(ce.Index)
 					ce.ReturnToPool()
 				} else {
+					// Our stream was closed out from underneath of us, simply return here.
+					if err == errStreamClosed {
+						return
+					}
 					s.Warnf("Error applying entries to '%s > %s': %v", accName, sa.Config.Name, err)
 					if isClusterResetErr(err) {
 						if mset.isMirror() && mset.IsLeader() {
@@ -2349,19 +2355,15 @@ func (js *jetStream) monitorStream(mset *stream, sa *streamAssignment, sendSnaps
 
 		case isLeader = <-lch:
 			if isLeader {
-				if mset != nil && n != nil {
-					// Send a snapshot if being asked or if we are tracking
-					// a failed state so that followers sync.
-					if clfs := mset.clearCLFS(); clfs > 0 || sendSnapshot {
-						n.SendSnapshot(mset.stateSnapshot())
-						sendSnapshot = false
-					}
+				if mset != nil && n != nil && sendSnapshot {
+					n.SendSnapshot(mset.stateSnapshot())
+					sendSnapshot = false
 				}
 				if isRestore {
 					acc, _ := s.LookupAccount(sa.Client.serviceAccount())
 					restoreDoneCh = s.processStreamRestore(sa.Client, acc, sa.Config, _EMPTY_, sa.Reply, _EMPTY_)
 					continue
-				} else if n.NeedSnapshot() {
+				} else if n != nil && n.NeedSnapshot() {
 					doSnapshot()
 				}
 				// Always cancel if this was running.
@@ -2388,17 +2390,22 @@ func (js *jetStream) monitorStream(mset *stream, sa *streamAssignment, sendSnaps
 			// Here we are checking if we are not the leader but we have been asked to allow
 			// direct access. We now allow non-leaders to participate in the queue group.
 			if !isLeader && mset != nil {
-				startDirectAccessMonitoring()
+				mset.mu.RLock()
+				ad, md := mset.cfg.AllowDirect, mset.cfg.MirrorDirect
+				mset.mu.RUnlock()
+				if ad || md {
+					startDirectAccessMonitoring()
+				}
 			}
 
 		case <-datc:
 			if mset == nil || isRecovering {
-				return
+				continue
 			}
 			// If we are leader we can stop, we know this is setup now.
 			if isLeader {
 				stopDirectMonitoring()
-				return
+				continue
 			}
 
 			mset.mu.Lock()
@@ -2550,6 +2557,8 @@ func (js *jetStream) monitorStream(mset *stream, sa *streamAssignment, sendSnaps
 					mset.setStreamAssignment(sa)
 					// Make sure to update our updateC which would have been nil.
 					uch = mset.updateC()
+					// Also update our mqch
+					mqch = mset.monitorQuitC()
 				}
 			}
 			if err != nil {
@@ -2782,6 +2791,7 @@ func (js *jetStream) applyStreamEntries(mset *stream, ce *CommittedEntry, isReco
 
 				// Grab last sequence and CLFS.
 				last, clfs := mset.lastSeqAndCLFS()
+
 				// We can skip if we know this is less than what we already have.
 				if lseq-clfs < last {
 					s.Debugf("Apply stream entries for '%s > %s' skipping message with sequence %d with last of %d",
@@ -2812,13 +2822,14 @@ func (js *jetStream) applyStreamEntries(mset *stream, ce *CommittedEntry, isReco
 
 				// Process the actual message here.
 				if err := mset.processJetStreamMsg(subject, reply, hdr, msg, lseq, ts); err != nil {
-					// Only return in place if we are going to reset stream or we are out of space.
-					if isClusterResetErr(err) || isOutOfSpaceErr(err) {
+					// Only return in place if we are going to reset our stream or we are out of space, or we are closed.
+					if isClusterResetErr(err) || isOutOfSpaceErr(err) || err == errStreamClosed {
 						return err
 					}
 					s.Debugf("Apply stream entries for '%s > %s' got error processing message: %v",
 						mset.account(), mset.name(), err)
 				}
+
 			case deleteMsgOp:
 				md, err := decodeMsgDelete(buf[1:])
 				if err != nil {
@@ -5950,10 +5961,13 @@ func sysRequest[T any](s *Server, subjFormat string, args ...interface{}) (*T, e
 		}
 	}()
 
+	ttl := time.NewTimer(2 * time.Second)
+	defer ttl.Stop()
+
 	select {
 	case <-s.quitCh:
 		return nil, errReqSrvExit
-	case <-time.After(2 * time.Second):
+	case <-ttl.C:
 		return nil, errReqTimeout
 	case data := <-results:
 		return data, nil
@@ -6086,6 +6100,12 @@ func (s *Server) jsClusteredStreamUpdateRequest(ci *ClientInfo, acc *Account, su
 	if isReplicaChange {
 		// We are adding new peers here.
 		if newCfg.Replicas > len(rg.Peers) {
+			// Check that we have the allocation available.
+			if err := js.jsClusteredStreamLimitsCheck(acc, newCfg); err != nil {
+				resp.Error = err
+				s.sendAPIErrResponse(ci, acc, subject, reply, string(rmsg), s.jsonResponse(&resp))
+				return
+			}
 			// Check if we do not have a cluster assigned, and if we do not make sure we
 			// try to pick one. This could happen with older streams that were assigned by
 			// previous servers.
@@ -6957,7 +6977,7 @@ func (s *Server) jsClusteredConsumerRequest(ci *ClientInfo, acc *Account, subjec
 
 	// Also short circuit if DeliverLastPerSubject is set with no FilterSubject.
 	if cfg.DeliverPolicy == DeliverLastPerSubject {
-		if cfg.FilterSubject == _EMPTY_ {
+		if cfg.FilterSubject == _EMPTY_ && len(cfg.FilterSubjects) == 0 {
 			resp.Error = NewJSConsumerInvalidPolicyError(fmt.Errorf("consumer delivery policy is deliver last per subject, but FilterSubject is not set"))
 			s.sendAPIErrResponse(ci, acc, subject, reply, string(rmsg), s.jsonResponse(&resp))
 			return
@@ -7382,7 +7402,7 @@ func (mset *stream) stateSnapshotLocked() []byte {
 		Bytes:    state.Bytes,
 		FirstSeq: state.FirstSeq,
 		LastSeq:  state.LastSeq,
-		Failed:   mset.clfs,
+		Failed:   mset.getCLFS(),
 		Deleted:  state.Deleted,
 	}
 	b, _ := json.Marshal(snap)
@@ -7419,7 +7439,7 @@ func (mset *stream) processClusteredInboundMsg(subject, reply string, hdr, msg [
 
 	mset.mu.RLock()
 	canRespond := !mset.cfg.NoAck && len(reply) > 0
-	name, stype, store := mset.cfg.Name, mset.cfg.Storage, mset.store
+	name, stype := mset.cfg.Name, mset.cfg.Storage
 	s, js, jsa, st, rf, tierName, outq, node := mset.srv, mset.js, mset.jsa, mset.cfg.Storage, mset.cfg.Replicas, mset.tier, mset.outq, mset.node
 	maxMsgSize, lseq, clfs := int(mset.cfg.MaxMsgSize), mset.lseq, mset.clfs
 	isLeader, isSealed := mset.isLeader(), mset.cfg.Sealed
@@ -7519,26 +7539,6 @@ func (mset *stream) processClusteredInboundMsg(subject, reply string, hdr, msg [
 
 	// Some header checks can be checked pre proposal. Most can not.
 	if len(hdr) > 0 {
-		// For CAS operations, e.g. ExpectedLastSeqPerSubject, we can also check here and not have to go through.
-		// Can only precheck for seq != 0.
-		if seq, exists := getExpectedLastSeqPerSubject(hdr); exists && store != nil && seq > 0 {
-			var smv StoreMsg
-			var fseq uint64
-			sm, err := store.LoadLastMsg(subject, &smv)
-			if sm != nil {
-				fseq = sm.seq
-			}
-			if err != nil || fseq != seq {
-				if canRespond {
-					var resp = &JSPubAckResponse{PubAck: &PubAck{Stream: name}}
-					resp.PubAck = &PubAck{Stream: name}
-					resp.Error = NewJSStreamWrongLastSequenceError(fseq)
-					b, _ := json.Marshal(resp)
-					outq.sendMsg(reply, b)
-				}
-				return fmt.Errorf("last sequence by subject mismatch: %d vs %d", seq, fseq)
-			}
-		}
 		// Expected stream name can also be pre-checked.
 		if sname := getExpectedStream(hdr); sname != _EMPTY_ && sname != name {
 			if canRespond {
@@ -7746,8 +7746,8 @@ func (mset *stream) processSnapshot(snap *StreamReplicatedState) (e error) {
 
 	mset.mu.Lock()
 	var state StreamState
-	mset.clfs = snap.Failed
 	mset.store.FastState(&state)
+	mset.setCLFS(snap.Failed)
 	sreq := mset.calculateSyncRequest(&state, snap)
 
 	s, js, subject, n := mset.srv, mset.js, mset.sa.Sync, mset.node
diff --git a/vendor/github.com/nats-io/nats-server/v2/server/leafnode.go b/vendor/github.com/nats-io/nats-server/v2/server/leafnode.go
index d5d41c5336..97fa8b4197 100644
--- a/vendor/github.com/nats-io/nats-server/v2/server/leafnode.go
+++ b/vendor/github.com/nats-io/nats-server/v2/server/leafnode.go
@@ -25,6 +25,7 @@ import (
 	"net/http"
 	"net/url"
 	"os"
+	"path"
 	"reflect"
 	"regexp"
 	"runtime"
@@ -2349,7 +2350,7 @@ func (c *client) processLeafSub(argo []byte) (err error) {
 
 	// Only add in shadow subs if a new sub or qsub.
 	if osub == nil {
-		if err := c.addShadowSubscriptions(acc, sub); err != nil {
+		if err := c.addShadowSubscriptions(acc, sub, true); err != nil {
 			c.Errorf(err.Error())
 		}
 	}
@@ -2784,14 +2785,16 @@ func (c *client) leafNodeSolicitWSConnection(opts *Options, rURL *url.URL, remot
 	// create a LEAF connection, not a CLIENT.
 	// In case we use the user's URL path in the future, make sure we append the user's
 	// path to our `/leafnode` path.
-	path := leafNodeWSPath
+	lpath := leafNodeWSPath
 	if curPath := rURL.EscapedPath(); curPath != _EMPTY_ {
 		if curPath[0] == '/' {
 			curPath = curPath[1:]
 		}
-		path += curPath
+		lpath = path.Join(curPath, lpath)
+	} else {
+		lpath = lpath[1:]
 	}
-	ustr := fmt.Sprintf("%s://%s%s", scheme, rURL.Host, path)
+	ustr := fmt.Sprintf("%s://%s/%s", scheme, rURL.Host, lpath)
 	u, _ := url.Parse(ustr)
 	req := &http.Request{
 		Method:     "GET",
diff --git a/vendor/github.com/nats-io/nats-server/v2/server/memstore.go b/vendor/github.com/nats-io/nats-server/v2/server/memstore.go
index 0d037be673..adf660846c 100644
--- a/vendor/github.com/nats-io/nats-server/v2/server/memstore.go
+++ b/vendor/github.com/nats-io/nats-server/v2/server/memstore.go
@@ -129,7 +129,7 @@ func (ms *memStore) storeRawMsg(subj string, hdr, msg []byte, seq uint64, ts int
 				return ErrMaxMsgs
 			}
 		}
-		if ms.cfg.MaxBytes > 0 && ms.state.Bytes+uint64(len(msg)+len(hdr)) >= uint64(ms.cfg.MaxBytes) {
+		if ms.cfg.MaxBytes > 0 && ms.state.Bytes+memStoreMsgSize(subj, hdr, msg) >= uint64(ms.cfg.MaxBytes) {
 			if !asl {
 				return ErrMaxBytes
 			}
@@ -138,7 +138,7 @@ func (ms *memStore) storeRawMsg(subj string, hdr, msg []byte, seq uint64, ts int
 				ms.recalculateFirstForSubj(subj, ss.First, ss)
 			}
 			sm, ok := ms.msgs[ss.First]
-			if !ok || memStoreMsgSize(sm.subj, sm.hdr, sm.msg) < uint64(len(msg)+len(hdr)) {
+			if !ok || memStoreMsgSize(sm.subj, sm.hdr, sm.msg) < memStoreMsgSize(subj, hdr, msg) {
 				return ErrMaxBytes
 			}
 		}
diff --git a/vendor/github.com/nats-io/nats-server/v2/server/mqtt.go b/vendor/github.com/nats-io/nats-server/v2/server/mqtt.go
index 7302722007..b347936f5d 100644
--- a/vendor/github.com/nats-io/nats-server/v2/server/mqtt.go
+++ b/vendor/github.com/nats-io/nats-server/v2/server/mqtt.go
@@ -155,6 +155,7 @@ const (
 	// while "$MQTT.JSA.<node id>.SL.<number>" is for a stream lookup, etc...
 	mqttJSAIdTokenPos     = 3
 	mqttJSATokenPos       = 4
+	mqttJSAClientIDPos    = 5
 	mqttJSAStreamCreate   = "SC"
 	mqttJSAStreamUpdate   = "SU"
 	mqttJSAStreamLookup   = "SL"
@@ -237,10 +238,9 @@ type mqttAccountSessionManager struct {
 	sl         *Sublist                       // sublist allowing to find retained messages for given subscription
 	retmsgs    map[string]*mqttRetainedMsgRef // retained messages
 	jsa        mqttJSA
-	rrmLastSeq uint64           // Restore retained messages expected last sequence
-	rrmDoneCh  chan struct{}    // To notify the caller that all retained messages have been loaded
-	sp         *ipQueue[uint64] // Used for cluster-wide processing of session records being persisted
-	domainTk   string           // Domain (with trailing "."), or possibly empty. This is added to session subject.
+	rrmLastSeq uint64        // Restore retained messages expected last sequence
+	rrmDoneCh  chan struct{} // To notify the caller that all retained messages have been loaded
+	domainTk   string        // Domain (with trailing "."), or possibly empty. This is added to session subject.
 }
 
 type mqttJSA struct {
@@ -1109,7 +1109,6 @@ func (s *Server) mqttCreateAccountSessionManager(acc *Account, quitCh chan struc
 			nuid:   nuid.New(),
 			quitCh: quitCh,
 		},
-		sp: newIPQueue[uint64](s, qname+"sp"),
 	}
 	// TODO record domain name in as here
 
@@ -1170,14 +1169,15 @@ func (s *Server) mqttCreateAccountSessionManager(acc *Account, quitCh chan struc
 	// This is a subscription that will process all JS API replies. We could split to
 	// individual subscriptions if needed, but since there is a bit of common code,
 	// that seemed like a good idea to be all in one place.
-	if err := as.createSubscription(jsa.rplyr+"*.*",
+	if err := as.createSubscription(jsa.rplyr+">",
 		as.processJSAPIReplies, &sid, &subs); err != nil {
 		return nil, err
 	}
 
 	// We will listen for replies to session persist requests so that we can
 	// detect the use of a session with the same client ID anywhere in the cluster.
-	if err := as.createSubscription(mqttJSARepliesPrefix+"*."+mqttJSASessPersist+".*",
+	//   `$MQTT.JSA.{js-id}.SP.{client-id-hash}.{uuid}`
+	if err := as.createSubscription(mqttJSARepliesPrefix+"*."+mqttJSASessPersist+".*.*",
 		as.processSessionPersist, &sid, &subs); err != nil {
 		return nil, err
 	}
@@ -1203,12 +1203,6 @@ func (s *Server) mqttCreateAccountSessionManager(acc *Account, quitCh chan struc
 		as.sendJSAPIrequests(s, c, accName, closeCh)
 	})
 
-	// Start the go routine that will handle network updates regarding sessions
-	s.startGoRoutine(func() {
-		defer s.grWG.Done()
-		as.sessPersistProcessing(closeCh)
-	})
-
 	lookupStream := func(stream, txt string) (*StreamInfo, error) {
 		si, err := jsa.lookupStream(stream)
 		if err != nil {
@@ -1407,9 +1401,12 @@ func (s *Server) mqttCreateAccountSessionManager(acc *Account, quitCh chan struc
 	}
 
 	if lastSeq > 0 {
+		ttl := time.NewTimer(mqttJSAPITimeout)
+		defer ttl.Stop()
+
 		select {
 		case <-rmDoneCh:
-		case <-time.After(mqttJSAPITimeout):
+		case <-ttl.C:
 			s.Warnf("Timing out waiting to load %v retained messages", st.Msgs)
 		case <-quitCh:
 			return nil, ErrServerNotRunning
@@ -1454,7 +1451,7 @@ func (s *Server) mqttDetermineReplicas() int {
 //////////////////////////////////////////////////////////////////////////////
 
 func (jsa *mqttJSA) newRequest(kind, subject string, hdr int, msg []byte) (interface{}, error) {
-	return jsa.newRequestEx(kind, subject, hdr, msg, mqttJSAPITimeout)
+	return jsa.newRequestEx(kind, subject, _EMPTY_, hdr, msg, mqttJSAPITimeout)
 }
 
 func (jsa *mqttJSA) prefixDomain(subject string) string {
@@ -1467,19 +1464,24 @@ func (jsa *mqttJSA) prefixDomain(subject string) string {
 	return subject
 }
 
-func (jsa *mqttJSA) newRequestEx(kind, subject string, hdr int, msg []byte, timeout time.Duration) (interface{}, error) {
+func (jsa *mqttJSA) newRequestEx(kind, subject, cidHash string, hdr int, msg []byte, timeout time.Duration) (interface{}, error) {
+	var sb strings.Builder
 	jsa.mu.Lock()
 	// Either we use nuid.Next() which uses a global lock, or our own nuid object, but
 	// then it needs to be "write" protected. This approach will reduce across account
 	// contention since we won't use the global nuid's lock.
-	var sb strings.Builder
 	sb.WriteString(jsa.rplyr)
 	sb.WriteString(kind)
 	sb.WriteByte(btsep)
+	if cidHash != _EMPTY_ {
+		sb.WriteString(cidHash)
+		sb.WriteByte(btsep)
+	}
 	sb.WriteString(jsa.nuid.Next())
-	reply := sb.String()
 	jsa.mu.Unlock()
 
+	reply := sb.String()
+
 	ch := make(chan interface{}, 1)
 	jsa.replies.Store(reply, ch)
 
@@ -1646,6 +1648,25 @@ func (jsa *mqttJSA) storeMsgWithKind(kind, subject string, headers int, msg []by
 	return smr, smr.ToError()
 }
 
+func (jsa *mqttJSA) storeSessionMsg(domainTk, cidHash string, hdr int, msg []byte) (*JSPubAckResponse, error) {
+	// Compute subject where the session is being stored
+	subject := mqttSessStreamSubjectPrefix + domainTk + cidHash
+
+	// Passing cidHash will add it to the JS reply subject, so that we can use
+	// it in processSessionPersist.
+	smri, err := jsa.newRequestEx(mqttJSASessPersist, subject, cidHash, hdr, msg, mqttJSAPITimeout)
+	if err != nil {
+		return nil, err
+	}
+	smr := smri.(*JSPubAckResponse)
+	return smr, smr.ToError()
+}
+
+func (jsa *mqttJSA) loadSessionMsg(domainTk, cidHash string) (*StoredMsg, error) {
+	subject := mqttSessStreamSubjectPrefix + domainTk + cidHash
+	return jsa.loadLastMsgFor(mqttSessStreamName, subject)
+}
+
 func (jsa *mqttJSA) deleteMsg(stream string, seq uint64, wait bool) error {
 	dreq := JSApiMsgDeleteRequest{Seq: seq, NoErase: true}
 	req, _ := json.Marshal(dreq)
@@ -1817,6 +1838,7 @@ func (as *mqttAccountSessionManager) processSessionPersist(_ *subscription, pc *
 	if tokenAt(subject, mqttJSAIdTokenPos) == as.jsa.id {
 		return
 	}
+	cIDHash := tokenAt(subject, mqttJSAClientIDPos)
 	_, msg := pc.msgParts(rmsg)
 	if len(msg) < LEN_CR_LF {
 		return
@@ -1839,18 +1861,6 @@ func (as *mqttAccountSessionManager) processSessionPersist(_ *subscription, pc *
 	if ignore {
 		return
 	}
-	// We would need to lookup the message and that would be a request/reply,
-	// which we can't do in place here. So move that to a long-running routine
-	// that will process the session persist record.
-	as.sp.push(par.Sequence)
-}
-
-func (as *mqttAccountSessionManager) processSessPersistRecord(seq uint64) {
-	smsg, err := as.jsa.loadMsg(mqttSessStreamName, seq)
-	if err != nil {
-		return
-	}
-	cIDHash := strings.TrimPrefix(smsg.Subject, mqttSessStreamSubjectPrefix+as.domainTk)
 
 	as.mu.Lock()
 	defer as.mu.Unlock()
@@ -1861,7 +1871,7 @@ func (as *mqttAccountSessionManager) processSessPersistRecord(seq uint64) {
 	// If our current session's stream sequence is higher, it means that this
 	// update is stale, so we don't do anything here.
 	sess.mu.Lock()
-	ignore := seq < sess.seq
+	ignore = par.Sequence < sess.seq
 	sess.mu.Unlock()
 	if ignore {
 		return
@@ -1881,28 +1891,6 @@ func (as *mqttAccountSessionManager) processSessPersistRecord(seq uint64) {
 	sess.mu.Unlock()
 }
 
-func (as *mqttAccountSessionManager) sessPersistProcessing(closeCh chan struct{}) {
-	as.mu.RLock()
-	sp := as.sp
-	quitCh := as.jsa.quitCh
-	as.mu.RUnlock()
-
-	for {
-		select {
-		case <-sp.ch:
-			seqs := sp.pop()
-			for _, seq := range seqs {
-				as.processSessPersistRecord(seq)
-			}
-			sp.recycle(&seqs)
-		case <-closeCh:
-			return
-		case <-quitCh:
-			return
-		}
-	}
-}
-
 // Adds this client ID to the flappers map, and if needed start the timer
 // for map cleanup.
 //
@@ -2176,6 +2164,30 @@ func (as *mqttAccountSessionManager) removeSession(sess *mqttSession, lock bool)
 	}
 }
 
+// Helpers that sets the sub's mqtt fields and possibly serialize
+// (pre-loaded) retained messages.
+// Session lock held on entry.
+func (sess *mqttSession) processSub(c *client, subject, sid []byte, isReserved bool, qos byte, jsDurName string, h msgHandler, initShadow bool) (*subscription, error) {
+	sub, err := c.processSub(subject, nil, sid, h, false)
+	if err != nil {
+		// c.processSub already called c.Errorf(), so no need here.
+		return nil, err
+	}
+	subs := []*subscription{sub}
+	if initShadow {
+		subs = append(subs, sub.shadow...)
+	}
+	for _, ss := range subs {
+		if ss.mqtt == nil {
+			ss.mqtt = &mqttSub{}
+		}
+		ss.mqtt.qos = qos
+		ss.mqtt.reserved = isReserved
+		ss.mqtt.jsDur = jsDurName
+	}
+	return sub, nil
+}
+
 // Process subscriptions for the given session/client.
 //
 // When `fromSubProto` is false, it means that this is invoked from the CONNECT
@@ -2193,14 +2205,85 @@ func (as *mqttAccountSessionManager) removeSession(sess *mqttSession, lock bool)
 func (as *mqttAccountSessionManager) processSubs(sess *mqttSession, c *client,
 	filters []*mqttFilter, fromSubProto, trace bool) ([]*subscription, error) {
 
-	// Helpers to lock/unlock both account manager and session.
-	asAndSessLock := func() {
-		as.mu.Lock()
-		sess.mu.Lock()
+	// Helper to determine if we need to create a separate top-level
+	// subscription for a wildcard.
+	fwc := func(subject string) (bool, string, string) {
+		if !mqttNeedSubForLevelUp(subject) {
+			return false, _EMPTY_, _EMPTY_
+		}
+		// Say subject is "foo.>", remove the ".>" so that it becomes "foo"
+		fwcsubject := subject[:len(subject)-2]
+		// Change the sid to "foo fwc"
+		fwcsid := fwcsubject + mqttMultiLevelSidSuffix
+
+		return true, fwcsubject, fwcsid
 	}
-	asAndSessUnlock := func() {
-		sess.mu.Unlock()
-		as.mu.Unlock()
+
+	// Cache and a helper to load retained messages for a given subject.
+	rms := make(map[string]*mqttRetainedMsg)
+	loadRMS := func(subject []byte) error {
+		sub := &subscription{
+			client:  c,
+			subject: subject,
+			sid:     subject,
+		}
+		c.mu.Lock()
+		acc := c.acc
+		c.mu.Unlock()
+		if err := c.addShadowSubscriptions(acc, sub, false); err != nil {
+			return err
+		}
+		// Best-effort loading the messages, logs on errors (to c.srv), loads
+		// once for subject.
+		as.loadRetainedMessagesForSubject(rms, subject, c.srv)
+		for _, ss := range sub.shadow {
+			as.loadRetainedMessagesForSubject(rms, ss.subject, c.srv)
+		}
+		return nil
+	}
+
+	// Preload retained messages for all requested subscriptions.  Also, since
+	// it's the first iteration over the filter list, do some cleanup.
+	for _, f := range filters {
+		if f.qos > 2 {
+			f.qos = 2
+		}
+		if c.mqtt.downgradeQoS2Sub && f.qos == 2 {
+			c.Warnf("Downgrading subscription QoS2 to QoS1 for %q, as configured", f.filter)
+			f.qos = 1
+		}
+
+		// Do not allow subscribing to our internal subjects.
+		//
+		// TODO: (levb: not sure why since one can subscribe to `#` and it'll
+		// include everything; I guess this would discourage? Otherwise another
+		// candidate for DO NOT DELIVER prefix list).
+		if strings.HasPrefix(f.filter, mqttSubPrefix) {
+			f.qos = mqttSubAckFailure
+			continue
+		}
+
+		if f.qos == 2 {
+			if err := sess.ensurePubRelConsumerSubscription(c); err != nil {
+				c.Errorf("failed to initialize PUBREL processing: %v", err)
+				f.qos = mqttSubAckFailure
+				continue
+			}
+		}
+
+		// Load retained messages.
+		if fromSubProto {
+			if err := loadRMS([]byte(f.filter)); err != nil {
+				f.qos = mqttSubAckFailure
+				continue
+			}
+			if need, subject, _ := fwc(f.filter); need {
+				if err := loadRMS([]byte(subject)); err != nil {
+					f.qos = mqttSubAckFailure
+					continue
+				}
+			}
+		}
 	}
 
 	// Small helper to add the consumer config to the session.
@@ -2214,90 +2297,80 @@ func (as *mqttAccountSessionManager) processSubs(sess *mqttSession, c *client,
 		sess.cons[sid] = cc
 	}
 
-	// Helper that sets the sub's mqtt fields and possibly serialize retained messages.
-	// Assumes account manager and session lock held.
-	setupSub := func(sub *subscription, qos byte) {
-		subs := []*subscription{sub}
-		if len(sub.shadow) > 0 {
-			subs = append(subs, sub.shadow...)
-		}
-		for _, sub := range subs {
-			if sub.mqtt == nil {
-				sub.mqtt = &mqttSub{}
-			}
-			sub.mqtt.qos = qos
-			sub.mqtt.reserved = isMQTTReservedSubscription(string(sub.subject))
-			if fromSubProto {
-				as.serializeRetainedMsgsForSub(sess, c, sub, trace)
-			}
+	serializeRMS := func(sub *subscription) {
+		for _, ss := range append([]*subscription{sub}, sub.shadow...) {
+			as.serializeRetainedMsgsForSub(rms, sess, c, ss, trace)
 		}
 	}
 
 	var err error
 	subs := make([]*subscription, 0, len(filters))
 	for _, f := range filters {
-		if f.qos > 2 {
-			f.qos = 2
-		}
-		if c.mqtt.downgradeQoS2Sub && f.qos == 2 {
-			c.Warnf("Downgrading subscription QoS2 to QoS1 for %q, as configured", f.filter)
-			f.qos = 1
-		}
-		subject := f.filter
-		sid := subject
 
-		if strings.HasPrefix(subject, mqttSubPrefix) {
-			f.qos = mqttSubAckFailure
+		// Skip what's already been identified as a failure.
+		if f.qos == mqttSubAckFailure {
 			continue
 		}
+		subject := f.filter
+		bsubject := []byte(subject)
+		sid := subject
+		bsid := bsubject
 
 		var jscons *ConsumerConfig
 		var jssub *subscription
 
-		// Note that if a subscription already exists on this subject,
-		// the existing sub is returned. Need to update the qos.
-		asAndSessLock()
-		sub, err := c.processSub([]byte(subject), nil, []byte(sid), mqttDeliverMsgCbQoS0, false)
-		if err == nil {
-			setupSub(sub, f.qos)
-		}
-		if f.qos == 2 {
-			err = sess.ensurePubRelConsumerSubscription(c)
-		}
-		asAndSessUnlock()
-
-		if err == nil {
-			// This will create (if not already exist) a JS consumer for subscriptions
-			// of QoS >= 1. But if a JS consumer already exists and the subscription
-			// for same subject is now a QoS==0, then the JS consumer will be deleted.
-			jscons, jssub, err = sess.processJSConsumer(c, subject, sid, f.qos, fromSubProto)
+		// Note that if a subscription already exists on this subject, the
+		// existing sub is returned. Need to update the qos.
+		as.mu.Lock()
+		sess.mu.Lock()
+		sub, err := sess.processSub(c, bsubject, bsid,
+			isMQTTReservedSubscription(subject), f.qos, _EMPTY_, mqttDeliverMsgCbQoS0, true)
+		if err == nil && fromSubProto {
+			serializeRMS(sub)
 		}
+		sess.mu.Unlock()
+		as.mu.Unlock()
 		if err != nil {
-			// c.processSub already called c.Errorf(), so no need here.
 			f.qos = mqttSubAckFailure
 			sess.cleanupFailedSub(c, sub, jscons, jssub)
 			continue
 		}
 
-		if mqttNeedSubForLevelUp(subject) {
+		// This will create (if not already exist) a JS consumer for
+		// subscriptions of QoS >= 1. But if a JS consumer already exists and
+		// the subscription for same subject is now a QoS==0, then the JS
+		// consumer will be deleted.
+		jscons, jssub, err = sess.processJSConsumer(c, subject, sid, f.qos, fromSubProto)
+		if err != nil {
+			f.qos = mqttSubAckFailure
+			sess.cleanupFailedSub(c, sub, jscons, jssub)
+			continue
+		}
+
+		// Process the wildcard subject if needed.
+		if need, fwcsubject, fwcsid := fwc(subject); need {
 			var fwjscons *ConsumerConfig
 			var fwjssub *subscription
 			var fwcsub *subscription
 
-			// Say subject is "foo.>", remove the ".>" so that it becomes "foo"
-			fwcsubject := subject[:len(subject)-2]
-			// Change the sid to "foo fwc"
-			fwcsid := fwcsubject + mqttMultiLevelSidSuffix
 			// See note above about existing subscription.
-			asAndSessLock()
-			fwcsub, err = c.processSub([]byte(fwcsubject), nil, []byte(fwcsid), mqttDeliverMsgCbQoS0, false)
-			if err == nil {
-				setupSub(fwcsub, f.qos)
+			as.mu.Lock()
+			sess.mu.Lock()
+			fwcsub, err = sess.processSub(c, []byte(fwcsubject), []byte(fwcsid),
+				isMQTTReservedSubscription(subject), f.qos, _EMPTY_, mqttDeliverMsgCbQoS0, true)
+			if err == nil && fromSubProto {
+				serializeRMS(fwcsub)
 			}
-			asAndSessUnlock()
-			if err == nil {
-				fwjscons, fwjssub, err = sess.processJSConsumer(c, fwcsubject, fwcsid, f.qos, fromSubProto)
+			sess.mu.Unlock()
+			as.mu.Unlock()
+			if err != nil {
+				// c.processSub already called c.Errorf(), so no need here.
+				f.qos = mqttSubAckFailure
+				sess.cleanupFailedSub(c, sub, jscons, jssub)
+				continue
 			}
+
+			fwjscons, fwjssub, err = sess.processJSConsumer(c, fwcsubject, fwcsid, f.qos, fromSubProto)
 			if err != nil {
 				// c.processSub already called c.Errorf(), so no need here.
 				f.qos = mqttSubAckFailure
@@ -2305,6 +2378,7 @@ func (as *mqttAccountSessionManager) processSubs(sess *mqttSession, c *client,
 				sess.cleanupFailedSub(c, fwcsub, fwjscons, fwjssub)
 				continue
 			}
+
 			subs = append(subs, fwcsub)
 			addJSConsToSess(fwcsid, fwjscons)
 		}
@@ -2328,15 +2402,19 @@ func (as *mqttAccountSessionManager) processSubs(sess *mqttSession, c *client,
 // Runs from the client's readLoop.
 // Account session manager lock held on entry.
 // Session lock held on entry.
-func (as *mqttAccountSessionManager) serializeRetainedMsgsForSub(sess *mqttSession, c *client, sub *subscription, trace bool) {
-	if len(as.retmsgs) == 0 {
+func (as *mqttAccountSessionManager) serializeRetainedMsgsForSub(rms map[string]*mqttRetainedMsg, sess *mqttSession, c *client, sub *subscription, trace bool) {
+	if len(as.retmsgs) == 0 || len(rms) == 0 {
 		return
 	}
-	var rmsa [64]*mqttRetainedMsg
-	rms := rmsa[:0]
-
-	as.getRetainedPublishMsgs(string(sub.subject), &rms)
-	for _, rm := range rms {
+	result := as.sl.ReverseMatch(string(sub.subject))
+	if len(result.psubs) == 0 {
+		return
+	}
+	for _, psub := range result.psubs {
+		rm, ok := rms[string(psub.subject)]
+		if !ok {
+			continue
+		}
 		if sub.mqtt.prm == nil {
 			sub.mqtt.prm = &mqttWriter{}
 		}
@@ -2379,23 +2457,36 @@ func (as *mqttAccountSessionManager) serializeRetainedMsgsForSub(sess *mqttSessi
 // Returns in the provided slice all publish retained message records that
 // match the given subscription's `subject` (which could have wildcards).
 //
-// Account session manager lock held on entry.
-func (as *mqttAccountSessionManager) getRetainedPublishMsgs(subject string, rms *[]*mqttRetainedMsg) {
-	result := as.sl.ReverseMatch(subject)
+// Account session manager NOT lock held on entry.
+func (as *mqttAccountSessionManager) loadRetainedMessagesForSubject(rms map[string]*mqttRetainedMsg, topSubject []byte, log Logger) {
+	as.mu.RLock()
+	if len(as.retmsgs) == 0 {
+		as.mu.RUnlock()
+		return
+	}
+	result := as.sl.ReverseMatch(string(topSubject))
+	as.mu.RUnlock()
+
 	if len(result.psubs) == 0 {
 		return
 	}
 	for _, sub := range result.psubs {
-		subj := mqttRetainedMsgsStreamSubject + string(sub.subject)
-		jsm, err := as.jsa.loadLastMsgFor(mqttRetainedMsgsStreamName, subj)
+		subject := string(sub.subject)
+		if rms[subject] != nil {
+			continue // already loaded
+		}
+		loadSubject := mqttRetainedMsgsStreamSubject + subject
+		jsm, err := as.jsa.loadLastMsgFor(mqttRetainedMsgsStreamName, loadSubject)
 		if err != nil || jsm == nil {
+			log.Warnf("failed to load retained message for subject %q: %v", loadSubject, err)
 			continue
 		}
 		var rm mqttRetainedMsg
 		if err := json.Unmarshal(jsm.Data, &rm); err != nil {
+			log.Warnf("failed to decode retained message for subject %q: %v", loadSubject, err)
 			continue
 		}
-		*rms = append(*rms, &rm)
+		rms[subject] = &rm
 	}
 }
 
@@ -2417,8 +2508,7 @@ func (as *mqttAccountSessionManager) createOrRestoreSession(clientID string, opt
 	}
 
 	hash := getHash(clientID)
-	subject := mqttSessStreamSubjectPrefix + as.domainTk + hash
-	smsg, err := jsa.loadLastMsgFor(mqttSessStreamName, subject)
+	smsg, err := jsa.loadSessionMsg(as.domainTk, hash)
 	if err != nil {
 		if isErrorOtherThan(err, JSNoMessageFoundErr) {
 			return formatError("loading session record", err)
@@ -2434,6 +2524,7 @@ func (as *mqttAccountSessionManager) createOrRestoreSession(clientID string, opt
 	if err := json.Unmarshal(smsg.Data, ps); err != nil {
 		return formatError(fmt.Sprintf("unmarshal of session record at sequence %v", smsg.Sequence), err)
 	}
+
 	// Restore this session (even if we don't own it), the caller will do the right thing.
 	sess := mqttSessionCreate(jsa, clientID, hash, smsg.Sequence, opts)
 	sess.domainTk = as.domainTk
@@ -2479,7 +2570,7 @@ func (as *mqttAccountSessionManager) transferUniqueSessStreamsToMuxed(log *Serve
 	}()
 
 	jsa := &as.jsa
-	sni, err := jsa.newRequestEx(mqttJSAStreamNames, JSApiStreams, 0, nil, 5*time.Second)
+	sni, err := jsa.newRequestEx(mqttJSAStreamNames, JSApiStreams, _EMPTY_, 0, nil, 5*time.Second)
 	if err != nil {
 		log.Errorf("Unable to transfer MQTT session streams: %v", err)
 		return
@@ -2514,10 +2605,8 @@ func (as *mqttAccountSessionManager) transferUniqueSessStreamsToMuxed(log *Serve
 			log.Warnf("    Unable to unmarshal the content of this stream, may not be a legitimate MQTT session stream, skipping")
 			continue
 		}
-		// Compute subject where the session is being stored
-		subject := mqttSessStreamSubjectPrefix + as.domainTk + getHash(ps.ID)
 		// Store record to MQTT session stream
-		if _, err := jsa.storeMsgWithKind(mqttJSASessPersist, subject, 0, smsg.Data); err != nil {
+		if _, err := jsa.storeSessionMsg(as.domainTk, getHash(ps.ID), 0, smsg.Data); err != nil {
 			log.Errorf("    Unable to transfer the session record: %v", err)
 			return
 		}
@@ -2553,7 +2642,8 @@ func (as *mqttAccountSessionManager) transferRetainedToPerKeySubjectStream(log *
 		}
 		// Store the message again, this time with the new per-key subject.
 		subject := mqttRetainedMsgsStreamSubject + rmsg.Subject
-		if _, err := jsa.storeMsgWithKind(mqttJSASessPersist, subject, 0, smsg.Data); err != nil {
+
+		if _, err := jsa.storeMsg(subject, 0, smsg.Data); err != nil {
 			log.Errorf("    Unable to transfer the retained message with sequence %d: %v", smsg.Sequence, err)
 			errors++
 			continue
@@ -2619,7 +2709,7 @@ func (sess *mqttSession) save() error {
 	}
 	b, _ := json.Marshal(&ps)
 
-	subject := mqttSessStreamSubjectPrefix + sess.domainTk + sess.idHash
+	domainTk, cidHash := sess.domainTk, sess.idHash
 	seq := sess.seq
 	sess.mu.Unlock()
 
@@ -2637,7 +2727,7 @@ func (sess *mqttSession) save() error {
 		b = bb.Bytes()
 	}
 
-	resp, err := sess.jsa.storeMsgWithKind(mqttJSASessPersist, subject, hdr, b)
+	resp, err := sess.jsa.storeSessionMsg(domainTk, cidHash, hdr, b)
 	if err != nil {
 		return fmt.Errorf("unable to persist session %q (seq=%v): %v", ps.ID, seq, err)
 	}
@@ -2691,8 +2781,13 @@ func (sess *mqttSession) clear() error {
 	}
 
 	if seq > 0 {
-		if err := sess.jsa.deleteMsg(mqttSessStreamName, seq, true); err != nil {
-			return fmt.Errorf("unable to delete session %q record at sequence %v", id, seq)
+		err := sess.jsa.deleteMsg(mqttSessStreamName, seq, true)
+		// Ignore the various errors indicating that the message (or sequence)
+		// is already deleted, can happen in a cluster.
+		if isErrorOtherThan(err, JSSequenceNotFoundErrF) {
+			if isErrorOtherThan(err, JSStreamMsgDeleteFailedF) || !strings.Contains(err.Error(), ErrStoreMsgNotFound.Error()) {
+				return fmt.Errorf("unable to delete session %q record at sequence %v: %v", id, seq, err)
+			}
 		}
 	}
 	return nil
@@ -3149,6 +3244,9 @@ func (c *client) mqttConnectTrace(cp *mqttConnectProto) string {
 		trace += fmt.Sprintf(" will=(topic=%s QoS=%v retain=%v)",
 			cp.will.topic, cp.will.qos, cp.will.retain)
 	}
+	if cp.flags&mqttConnFlagCleanSession != 0 {
+		trace += " clean"
+	}
 	if c.opts.Username != _EMPTY_ {
 		trace += fmt.Sprintf(" username=%s", c.opts.Username)
 	}
@@ -4349,11 +4447,11 @@ func mqttIsReservedSub(sub *subscription, subject string) bool {
 
 // Check if a sub is a reserved wildcard. E.g. '#', '*', or '*/" prefix.
 func isMQTTReservedSubscription(subject string) bool {
-	if len(subject) == 1 && subject[0] == fwc || subject[0] == pwc {
+	if len(subject) == 1 && (subject[0] == fwc || subject[0] == pwc) {
 		return true
 	}
 	// Match "*.<>"
-	if len(subject) > 1 && subject[0] == pwc && subject[1] == btsep {
+	if len(subject) > 1 && (subject[0] == pwc && subject[1] == btsep) {
 		return true
 	}
 	return false
@@ -4467,9 +4565,6 @@ func (sess *mqttSession) cleanupFailedSub(c *client, sub *subscription, cc *Cons
 
 // Make sure we are set up to deliver PUBREL messages to this QoS2-subscribed
 // session.
-//
-// Session lock held on entry. Need to make sure no other subscribe packet races
-// to do the same.
 func (sess *mqttSession) ensurePubRelConsumerSubscription(c *client) error {
 	opts := c.srv.getOpts()
 	ackWait := opts.MQTT.AckWait
@@ -4481,21 +4576,32 @@ func (sess *mqttSession) ensurePubRelConsumerSubscription(c *client) error {
 		maxAckPending = mqttDefaultMaxAckPending
 	}
 
+	sess.mu.Lock()
+	pubRelSubscribed := sess.pubRelSubscribed
+	pubRelSubject := sess.pubRelSubject
+	pubRelDeliverySubjectB := sess.pubRelDeliverySubjectB
+	pubRelDeliverySubject := sess.pubRelDeliverySubject
+	pubRelConsumer := sess.pubRelConsumer
+	tmaxack := sess.tmaxack
+	idHash := sess.idHash
+	id := sess.id
+	sess.mu.Unlock()
+
 	// Subscribe before the consumer is created so we don't loose any messages.
-	if !sess.pubRelSubscribed {
-		_, err := c.processSub(sess.pubRelDeliverySubjectB, nil, sess.pubRelDeliverySubjectB,
+	if !pubRelSubscribed {
+		_, err := c.processSub(pubRelDeliverySubjectB, nil, pubRelDeliverySubjectB,
 			mqttDeliverPubRelCb, false)
 		if err != nil {
-			c.Errorf("Unable to create subscription for JetStream consumer on %q: %v", sess.pubRelDeliverySubject, err)
+			c.Errorf("Unable to create subscription for JetStream consumer on %q: %v", pubRelDeliverySubject, err)
 			return err
 		}
-		sess.pubRelSubscribed = true
+		pubRelSubscribed = true
 	}
 
 	// Create the consumer if needed.
-	if sess.pubRelConsumer == nil {
+	if pubRelConsumer == nil {
 		// Check that the limit of subs' maxAckPending are not going over the limit
-		if after := sess.tmaxack + maxAckPending; after > mqttMaxAckTotalLimit {
+		if after := tmaxack + maxAckPending; after > mqttMaxAckTotalLimit {
 			return fmt.Errorf("max_ack_pending for all consumers would be %v which exceeds the limit of %v",
 				after, mqttMaxAckTotalLimit)
 		}
@@ -4503,11 +4609,11 @@ func (sess *mqttSession) ensurePubRelConsumerSubscription(c *client) error {
 		ccr := &CreateConsumerRequest{
 			Stream: mqttOutStreamName,
 			Config: ConsumerConfig{
-				DeliverSubject: sess.pubRelDeliverySubject,
-				Durable:        mqttPubRelConsumerDurablePrefix + sess.idHash,
+				DeliverSubject: pubRelDeliverySubject,
+				Durable:        mqttPubRelConsumerDurablePrefix + idHash,
 				AckPolicy:      AckExplicit,
 				DeliverPolicy:  DeliverNew,
-				FilterSubject:  sess.pubRelSubject,
+				FilterSubject:  pubRelSubject,
 				AckWait:        ackWait,
 				MaxAckPending:  maxAckPending,
 				MemoryStorage:  opts.MQTT.ConsumerMemoryStorage,
@@ -4517,28 +4623,41 @@ func (sess *mqttSession) ensurePubRelConsumerSubscription(c *client) error {
 			ccr.Config.InactiveThreshold = opts.MQTT.ConsumerInactiveThreshold
 		}
 		if _, err := sess.jsa.createConsumer(ccr); err != nil {
-			c.Errorf("Unable to add JetStream consumer for PUBREL for client %q: err=%v", sess.id, err)
+			c.Errorf("Unable to add JetStream consumer for PUBREL for client %q: err=%v", id, err)
 			return err
 		}
-		sess.pubRelConsumer = &ccr.Config
-		sess.tmaxack += maxAckPending
+		pubRelConsumer = &ccr.Config
+		tmaxack += maxAckPending
 	}
 
+	sess.mu.Lock()
+	sess.pubRelSubscribed = pubRelSubscribed
+	sess.pubRelConsumer = pubRelConsumer
+	sess.tmaxack = tmaxack
+	sess.mu.Unlock()
+
 	return nil
 }
 
 // When invoked with a QoS of 0, looks for an existing JS durable consumer for
 // the given sid and if one is found, delete the JS durable consumer and unsub
 // the NATS subscription on the delivery subject.
+//
 // With a QoS > 0, creates or update the existing JS durable consumer along with
 // its NATS subscription on a delivery subject.
 //
-// Lock not held on entry, but session is in the locked map.
+// Session lock is acquired and released as needed. Session is in the locked
+// map.
 func (sess *mqttSession) processJSConsumer(c *client, subject, sid string,
 	qos byte, fromSubProto bool) (*ConsumerConfig, *subscription, error) {
 
-	// Check if we are already a JS consumer for this SID.
+	sess.mu.Lock()
 	cc, exists := sess.cons[sid]
+	tmaxack := sess.tmaxack
+	idHash := sess.idHash
+	sess.mu.Unlock()
+
+	// Check if we are already a JS consumer for this SID.
 	if exists {
 		// If current QoS is 0, it means that we need to delete the existing
 		// one (that was QoS > 0)
@@ -4547,7 +4666,11 @@ func (sess *mqttSession) processJSConsumer(c *client, subject, sid string,
 			// the form: mqttSubPrefix + <nuid>. It is also used as the sid
 			// for the NATS subscription, so use that for the lookup.
 			sub := c.subs[cc.DeliverSubject]
+
+			sess.mu.Lock()
 			delete(sess.cons, sid)
+			sess.mu.Unlock()
+
 			sess.deleteConsumer(cc)
 			if sub != nil {
 				c.processUnsub(sub.sid)
@@ -4583,12 +4706,12 @@ func (sess *mqttSession) processJSConsumer(c *client, subject, sid string,
 		}
 
 		// Check that the limit of subs' maxAckPending are not going over the limit
-		if after := sess.tmaxack + maxAckPending; after > mqttMaxAckTotalLimit {
+		if after := tmaxack + maxAckPending; after > mqttMaxAckTotalLimit {
 			return nil, nil, fmt.Errorf("max_ack_pending for all consumers would be %v which exceeds the limit of %v",
 				after, mqttMaxAckTotalLimit)
 		}
 
-		durName := sess.idHash + "_" + nuid.Next()
+		durName := idHash + "_" + nuid.Next()
 		ccr := &CreateConsumerRequest{
 			Stream: mqttStreamName,
 			Config: ConsumerConfig{
@@ -4610,25 +4733,22 @@ func (sess *mqttSession) processJSConsumer(c *client, subject, sid string,
 			return nil, nil, err
 		}
 		cc = &ccr.Config
-		sess.tmaxack += maxAckPending
+		tmaxack += maxAckPending
 	}
+
 	// This is an internal subscription on subject like "$MQTT.sub.<nuid>" that is setup
 	// for the JS durable's deliver subject.
 	sess.mu.Lock()
-	sub, err := c.processSub([]byte(inbox), nil, []byte(inbox), mqttDeliverMsgCbQoS12, false)
+	sess.tmaxack = tmaxack
+	sub, err := sess.processSub(c, []byte(inbox), []byte(inbox),
+		isMQTTReservedSubscription(subject), qos, cc.Durable, mqttDeliverMsgCbQoS12, false)
+	sess.mu.Unlock()
+
 	if err != nil {
-		sess.mu.Unlock()
 		sess.deleteConsumer(cc)
 		c.Errorf("Unable to create subscription for JetStream consumer on %q: %v", subject, err)
 		return nil, nil, err
 	}
-	if sub.mqtt == nil {
-		sub.mqtt = &mqttSub{}
-	}
-	sub.mqtt.qos = qos
-	sub.mqtt.jsDur = cc.Durable
-	sub.mqtt.reserved = isMQTTReservedSubscription(subject)
-	sess.mu.Unlock()
 	return cc, sub, nil
 }
 
diff --git a/vendor/github.com/nats-io/nats-server/v2/server/raft.go b/vendor/github.com/nats-io/nats-server/v2/server/raft.go
index b0f30786be..87bd00f94e 100644
--- a/vendor/github.com/nats-io/nats-server/v2/server/raft.go
+++ b/vendor/github.com/nats-io/nats-server/v2/server/raft.go
@@ -103,7 +103,6 @@ const (
 	Follower RaftState = iota
 	Leader
 	Candidate
-	Observer
 	Closed
 )
 
@@ -115,8 +114,6 @@ func (state RaftState) String() string {
 		return "CANDIDATE"
 	case Leader:
 		return "LEADER"
-	case Observer:
-		return "OBSERVER"
 	case Closed:
 		return "CLOSED"
 	}
@@ -125,108 +122,105 @@ func (state RaftState) String() string {
 
 type raft struct {
 	sync.RWMutex
-	created  time.Time
-	group    string
-	sd       string
-	id       string
-	wal      WAL
-	wtype    StorageType
-	track    bool
-	werr     error
+
+	created time.Time // Time that the group was created
+	accName string    // Account name of the asset this raft group is for
+	group   string    // Raft group
+	sd      string    // Store directory
+	id      string    // Node ID
+
+	wal   WAL         // WAL store (filestore or memstore)
+	wtype StorageType // WAL type, e.g. FileStorage or MemoryStorage
+	track bool        //
+	werr  error       // Last write error
+
 	state    atomic.Int32 // RaftState
-	hh       hash.Hash64
-	snapfile string
-	csz      int
-	qn       int
-	peers    map[string]*lps
-	removed  map[string]struct{}
-	acks     map[uint64]map[string]struct{}
-	pae      map[uint64]*appendEntry
-	elect    *time.Timer
-	active   time.Time
-	llqrt    time.Time
-	lsut     time.Time
-	term     uint64 // The current vote term
-	pterm    uint64 // Previous term from the last snapshot
-	pindex   uint64 // Previous index from the last snapshot
-	commit   uint64 // Sequence number of the most recent commit
-	applied  uint64 // Sequence number of the most recently applied commit
-	leader   string // The ID of the leader
-	vote     string
-	hash     string
-	s        *Server
-	c        *client
-	js       *jetStream
-	dflag    bool
-	pleader  bool
-	observer bool
-	extSt    extensionState
+	hh       hash.Hash64  // Highwayhash, used for snapshots
+	snapfile string       // Snapshot filename
 
-	// Subjects for votes, updates, replays.
-	psubj  string
-	rpsubj string
-	vsubj  string
-	vreply string
-	asubj  string
-	areply string
+	csz   int             // Cluster size
+	qn    int             // Number of nodes needed to establish quorum
+	peers map[string]*lps // Other peers in the Raft group
 
-	sq    *sendq
-	aesub *subscription
+	removed map[string]struct{}            // Peers that were removed from the group
+	acks    map[uint64]map[string]struct{} // Append entry responses/acks, map of entry index -> peer ID
+	pae     map[uint64]*appendEntry        // Pending append entries
 
-	// Are we doing a leadership transfer.
-	lxfer bool
+	elect  *time.Timer // Election timer, normally accessed via electTimer
+	active time.Time   // Last activity time, i.e. for heartbeats
+	llqrt  time.Time   // Last quorum lost time
+	lsut   time.Time   // Last scale-up time
 
-	// For holding term and vote and peerstate to be written.
-	wtv   []byte
-	wps   []byte
-	wtvch chan struct{}
-	wpsch chan struct{}
+	term    uint64 // The current vote term
+	pterm   uint64 // Previous term from the last snapshot
+	pindex  uint64 // Previous index from the last snapshot
+	commit  uint64 // Sequence number of the most recent commit
+	applied uint64 // Sequence number of the most recently applied commit
 
-	// For when we need to catch up as a follower.
-	catchup *catchupState
+	leader string // The ID of the leader
+	vote   string // Our current vote state
+	lxfer  bool   // Are we doing a leadership transfer?
 
-	// For leader or server catching up a follower.
-	progress map[string]*ipQueue[uint64]
+	s  *Server    // Reference to top-level server
+	c  *client    // Internal client for subscriptions
+	js *jetStream // JetStream, if running, to see if we are out of resources
 
-	// For when we have paused our applyC.
-	paused    bool
-	hcommit   uint64
-	pobserver bool
+	dflag    bool           // Debug flag
+	pleader  bool           // Has the group ever had a leader?
+	observer bool           // The node is observing, i.e. not participating in voting
+	extSt    extensionState // Extension state
 
-	// Queues and Channels
-	prop     *ipQueue[*Entry]
-	entry    *ipQueue[*appendEntry]
-	resp     *ipQueue[*appendEntryResponse]
-	apply    *ipQueue[*CommittedEntry]
-	reqs     *ipQueue[*voteRequest]
-	votes    *ipQueue[*voteResponse]
-	stepdown *ipQueue[string]
-	leadc    chan bool
-	quit     chan struct{}
+	psubj  string // Proposals subject
+	rpsubj string // Remove peers subject
+	vsubj  string // Vote requests subject
+	vreply string // Vote responses subject
+	asubj  string // Append entries subject
+	areply string // Append entries responses subject
 
-	// Account name of the asset this raft group is for
-	accName string
+	sq    *sendq        // Send queue for outbound RPC messages
+	aesub *subscription // Subscription for handleAppendEntry callbacks
 
-	// Random generator, used to generate inboxes for instance
-	prand *rand.Rand
+	wtv   []byte        // Term and vote to be written
+	wps   []byte        // Peer state to be written
+	wtvch chan struct{} // Signals when a term vote was just written, to kick file writer
+	wpsch chan struct{} // Signals when a peer state was just written, to kick file writer
+
+	catchup  *catchupState               // For when we need to catch up as a follower.
+	progress map[string]*ipQueue[uint64] // For leader or server catching up a follower.
+
+	paused    bool   // Whether or not applies are paused
+	hcommit   uint64 // The commit at the time that applies were paused
+	pobserver bool   // Whether we were an observer at the time that applies were paused
+
+	prop     *ipQueue[*Entry]               // Proposals
+	entry    *ipQueue[*appendEntry]         // Append entries
+	resp     *ipQueue[*appendEntryResponse] // Append entries responses
+	apply    *ipQueue[*CommittedEntry]      // Apply queue (committed entries to be passed to upper layer)
+	reqs     *ipQueue[*voteRequest]         // Vote requests
+	votes    *ipQueue[*voteResponse]        // Vote responses
+	stepdown *ipQueue[string]               // Stepdown requests
+	leadc    chan bool                      // Leader changes
+	quit     chan struct{}                  // Raft group shutdown
+
+	prand *rand.Rand // Random generator, used to generate inboxes for instance
 }
 
 // cacthupState structure that holds our subscription, and catchup term and index
 // as well as starting term and index and how many updates we have seen.
 type catchupState struct {
-	sub    *subscription
-	cterm  uint64
-	cindex uint64
-	pterm  uint64
-	pindex uint64
-	active time.Time
+	sub    *subscription // Subscription that catchup messages will arrive on
+	cterm  uint64        // Catchup term
+	cindex uint64        // Catchup index
+	pterm  uint64        // Starting term
+	pindex uint64        // Starting index
+	active time.Time     // Last time we received a message for this catchup
 }
 
 // lps holds peer state of last time and last index replicated.
 type lps struct {
-	ts int64
-	li uint64
-	kp bool // marks as known peer.
+	ts int64  // Last timestamp
+	li uint64 // Last index replicated
+	kp bool   // Known peer
 }
 
 const (
@@ -237,7 +231,6 @@ const (
 	hbIntervalDefault              = 1 * time.Second
 	lostQuorumIntervalDefault      = hbIntervalDefault * 10 // 10 seconds
 	lostQuorumCheckIntervalDefault = hbIntervalDefault * 10 // 10 seconds
-
 )
 
 var (
@@ -383,7 +376,6 @@ func (s *Server) startRaftNode(accName string, cfg *RaftConfig, labels pprofLabe
 		track:    cfg.Track,
 		csz:      ps.clusterSize,
 		qn:       ps.clusterSize/2 + 1,
-		hash:     hash,
 		peers:    make(map[string]*lps),
 		acks:     make(map[uint64]map[string]struct{}),
 		pae:      make(map[uint64]*appendEntry),
@@ -413,14 +405,19 @@ func (s *Server) startRaftNode(accName string, cfg *RaftConfig, labels pprofLabe
 		n.dflag = true
 	}
 
+	// Set up the highwayhash for the snapshots.
 	key := sha256.Sum256([]byte(n.group))
 	n.hh, _ = highwayhash.New64(key[:])
 
+	// If we have a term and vote file (tav.idx on the filesystem) then read in
+	// what we think the term and vote was. It's possible these are out of date
+	// so a catch-up may be required.
 	if term, vote, err := n.readTermVote(); err == nil && term > 0 {
 		n.term = term
 		n.vote = vote
 	}
 
+	// Make sure that the snapshots directory exists.
 	if err := os.MkdirAll(filepath.Join(n.sd, snapshotsDir), 0750); err != nil {
 		return nil, fmt.Errorf("could not create snapshots directory - %v", err)
 	}
@@ -433,6 +430,9 @@ func (s *Server) startRaftNode(accName string, cfg *RaftConfig, labels pprofLabe
 		n.setupLastSnapshot()
 	}
 
+	// Retrieve the stream state from the WAL. If there are pending append
+	// entries that were committed but not applied before we last shut down,
+	// we will try to replay them and process them here.
 	var state StreamState
 	n.wal.FastState(&state)
 	if state.Msgs > 0 {
@@ -444,6 +444,8 @@ func (s *Server) startRaftNode(accName string, cfg *RaftConfig, labels pprofLabe
 			}
 		}
 
+		// It looks like there are entries we have committed but not applied
+		// yet. Replay them.
 		for index := state.FirstSeq; index <= state.LastSeq; index++ {
 			ae, err := n.loadEntry(index)
 			if err != nil {
@@ -469,15 +471,18 @@ func (s *Server) startRaftNode(accName string, cfg *RaftConfig, labels pprofLabe
 
 	// Make sure to track ourselves.
 	n.peers[n.id] = &lps{time.Now().UnixNano(), 0, true}
+
 	// Track known peers
 	for _, peer := range ps.knownPeers {
-		// Set these to 0 to start but mark as known peer.
 		if peer != n.id {
+			// Set these to 0 to start but mark as known peer.
 			n.peers[peer] = &lps{0, 0, true}
 		}
 	}
 
-	// Setup our internal subscriptions.
+	// Setup our internal subscriptions for proposals, votes and append entries.
+	// If we fail to do this for some reason then this is fatal — we cannot
+	// continue setting up or the Raft node may be partially/totally isolated.
 	if err := n.createInternalSubs(); err != nil {
 		n.shutdown(true)
 		return nil, err
@@ -486,18 +491,26 @@ func (s *Server) startRaftNode(accName string, cfg *RaftConfig, labels pprofLabe
 	n.debug("Started")
 
 	// Check if we need to start in observer mode due to lame duck status.
+	// This will stop us from taking on the leader role when we're about to
+	// shutdown anyway.
 	if s.isLameDuckMode() {
 		n.debug("Will start in observer mode due to lame duck status")
 		n.SetObserver(true)
 	}
 
+	// Set the election timer and lost quorum timers to now, so that we
+	// won't accidentally trigger either state without knowing the real state
+	// of the other nodes.
 	n.Lock()
 	n.resetElectionTimeout()
 	n.llqrt = time.Now()
 	n.Unlock()
 
+	// Register the Raft group.
 	labels["group"] = n.group
 	s.registerRaftNode(n.group, n)
+
+	// Start the goroutines for the Raft state machine and the file writer.
 	s.startGoRoutine(n.run, labels)
 	s.startGoRoutine(n.fileWriter)
 
@@ -529,7 +542,8 @@ func (s *Server) clusterNameForNode(node string) string {
 	return _EMPTY_
 }
 
-// Server will track all raft nodes.
+// Registers the Raft node with the server, as it will track all of the Raft
+// nodes.
 func (s *Server) registerRaftNode(group string, n RaftNode) {
 	s.rnMu.Lock()
 	defer s.rnMu.Unlock()
@@ -539,6 +553,7 @@ func (s *Server) registerRaftNode(group string, n RaftNode) {
 	s.raftNodes[group] = n
 }
 
+// Unregisters the Raft node from the server, i.e. at shutdown.
 func (s *Server) unregisterRaftNode(group string) {
 	s.rnMu.Lock()
 	defer s.rnMu.Unlock()
@@ -547,12 +562,15 @@ func (s *Server) unregisterRaftNode(group string) {
 	}
 }
 
+// Returns how many Raft nodes are running in this server instance.
 func (s *Server) numRaftNodes() int {
 	s.rnMu.Lock()
 	defer s.rnMu.Unlock()
 	return len(s.raftNodes)
 }
 
+// Finds the Raft node for a given Raft group, if any. If there is no Raft node
+// running for this group then it can return nil.
 func (s *Server) lookupRaftNode(group string) RaftNode {
 	s.rnMu.RLock()
 	defer s.rnMu.RUnlock()
@@ -563,6 +581,8 @@ func (s *Server) lookupRaftNode(group string) RaftNode {
 	return n
 }
 
+// Reloads the debug state for all running Raft nodes. This is necessary when
+// the configuration has been reloaded and the debug log level has changed.
 func (s *Server) reloadDebugRaftNodes(debug bool) {
 	if s == nil {
 		return
@@ -577,15 +597,19 @@ func (s *Server) reloadDebugRaftNodes(debug bool) {
 	s.rnMu.RUnlock()
 }
 
+// Requests that all Raft nodes on this server step down and place them into
+// observer mode. This is called when the server is shutting down.
 func (s *Server) stepdownRaftNodes() {
 	if s == nil {
 		return
 	}
-	var nodes []RaftNode
 	s.rnMu.RLock()
-	if len(s.raftNodes) > 0 {
-		s.Debugf("Stepping down all leader raft nodes")
+	if len(s.raftNodes) == 0 {
+		s.rnMu.RUnlock()
+		return
 	}
+	s.Debugf("Stepping down all leader raft nodes")
+	nodes := make([]RaftNode, 0, len(s.raftNodes))
 	for _, n := range s.raftNodes {
 		nodes = append(nodes, n)
 	}
@@ -599,15 +623,20 @@ func (s *Server) stepdownRaftNodes() {
 	}
 }
 
+// Shuts down all Raft nodes on this server. This is called either when the
+// server is either entering lame duck mode, shutting down or when JetStream
+// has been disabled.
 func (s *Server) shutdownRaftNodes() {
 	if s == nil {
 		return
 	}
-	var nodes []RaftNode
 	s.rnMu.RLock()
-	if len(s.raftNodes) > 0 {
-		s.Debugf("Shutting down all raft nodes")
+	if len(s.raftNodes) == 0 {
+		s.rnMu.RUnlock()
+		return
 	}
+	nodes := make([]RaftNode, 0, len(s.raftNodes))
+	s.Debugf("Shutting down all raft nodes")
 	for _, n := range s.raftNodes {
 		nodes = append(nodes, n)
 	}
@@ -625,11 +654,12 @@ func (s *Server) transferRaftLeaders() bool {
 	if s == nil {
 		return false
 	}
-	var nodes []RaftNode
 	s.rnMu.RLock()
-	if len(s.raftNodes) > 0 {
-		s.Debugf("Transferring any raft leaders")
+	if len(s.raftNodes) == 0 {
+		s.rnMu.RUnlock()
+		return false
 	}
+	nodes := make([]RaftNode, 0, len(s.raftNodes))
 	for _, n := range s.raftNodes {
 		nodes = append(nodes, n)
 	}
@@ -668,7 +698,8 @@ func (n *raft) Propose(data []byte) error {
 	return nil
 }
 
-// ProposeDirect will propose entries directly.
+// ProposeDirect will propose entries directly by skipping the Raft state
+// machine and sending them straight to the wire instead.
 // This should only be called on the leader.
 func (n *raft) ProposeDirect(entries []*Entry) error {
 	if state := n.State(); state != Leader {
@@ -746,13 +777,16 @@ func (n *raft) ProposeRemovePeer(peer string) error {
 		return werr
 	}
 
+	// If we are the leader then we are responsible for processing the
+	// peer remove and then notifying the rest of the group that the
+	// peer was removed.
 	if isLeader {
 		prop.push(newEntry(EntryRemovePeer, []byte(peer)))
 		n.doRemovePeerAsLeader(peer)
 		return nil
 	}
 
-	// Need to forward.
+	// Otherwise we need to forward the proposal to the leader.
 	n.sendRPC(subj, _EMPTY_, []byte(peer))
 	return nil
 }
@@ -779,7 +813,8 @@ func (n *raft) AdjustBootClusterSize(csz int) error {
 	if csz < 2 {
 		csz = 2
 	}
-	// Adjust.
+	// Adjust the cluster size and the number of nodes needed to establish
+	// a quorum.
 	n.csz = csz
 	n.qn = n.csz/2 + 1
 
@@ -798,7 +833,8 @@ func (n *raft) AdjustClusterSize(csz int) error {
 		csz = 2
 	}
 
-	// Adjust.
+	// Adjust the cluster size and the number of nodes needed to establish
+	// a quorum.
 	n.csz = csz
 	n.qn = n.csz/2 + 1
 	n.Unlock()
@@ -808,7 +844,8 @@ func (n *raft) AdjustClusterSize(csz int) error {
 }
 
 // PauseApply will allow us to pause processing of append entries onto our
-// external apply chan.
+// external apply queue. In effect this means that the upper layer will no longer
+// receive any new entries from the Raft group.
 func (n *raft) PauseApply() error {
 	if n.State() == Leader {
 		return errAlreadyLeader
@@ -832,6 +869,8 @@ func (n *raft) PauseApply() error {
 	return nil
 }
 
+// ResumeApply will resume sending applies to the external apply queue. This
+// means that we will start sending new entries to the upper layer.
 func (n *raft) ResumeApply() {
 	n.Lock()
 	defer n.Unlock()
@@ -862,8 +901,9 @@ func (n *raft) ResumeApply() {
 	}
 }
 
-// Applied is to be called when the FSM has applied the committed entries.
-// Applied will return the number of entries and an estimation of the
+// Applied is a callback that must be be called by the upper layer when it
+// has successfully applied the committed entries that it received from the
+// apply queue. It will return the number of entries and an estimation of the
 // byte size that could be removed with a snapshot/compact.
 func (n *raft) Applied(index uint64) (entries uint64, bytes uint64) {
 	n.Lock()
@@ -878,6 +918,9 @@ func (n *raft) Applied(index uint64) (entries uint64, bytes uint64) {
 	if index > n.applied {
 		n.applied = index
 	}
+
+	// Calculate the number of entries and estimate the byte size that
+	// we can now remove with a compaction/snapshot.
 	var state StreamState
 	n.wal.FastState(&state)
 	if n.applied > state.FirstSeq {
@@ -945,11 +988,14 @@ func (n *raft) InstallSnapshot(data []byte) error {
 
 	n.Lock()
 
+	// If a write error has occurred already then stop here.
 	if werr := n.werr; werr != nil {
 		n.Unlock()
 		return werr
 	}
 
+	// Check that a catchup isn't already taking place. If it is then we won't
+	// allow installing snapshots until it is done.
 	if len(n.progress) > 0 {
 		n.Unlock()
 		return errCatchupsRunning
@@ -967,10 +1013,13 @@ func (n *raft) InstallSnapshot(data []byte) error {
 
 	var term uint64
 	if ae, _ := n.loadEntry(n.applied); ae != nil {
+		// Use the term from the most recently applied entry if possible.
 		term = ae.term
 	} else if ae, _ = n.loadFirstEntry(); ae != nil {
+		// Otherwise see if we can find the term from the first entry.
 		term = ae.term
 	} else {
+		// Last resort is to use the last pterm that we knew of.
 		term = n.pterm
 	}
 
@@ -1013,6 +1062,9 @@ func (n *raft) InstallSnapshot(data []byte) error {
 	return nil
 }
 
+// NeedSnapshot returns true if it is necessary to try to install a snapshot, i.e.
+// after we have finished recovering/replaying at startup, on a regular interval or
+// as a part of cleaning up when shutting down.
 func (n *raft) NeedSnapshot() bool {
 	n.RLock()
 	defer n.RUnlock()
@@ -1024,6 +1076,8 @@ const (
 	snapFileT    = "snap.%d.%d"
 )
 
+// termAndIndexFromSnapfile tries to load the snapshot file and returns the term
+// and index from that snapshot.
 func termAndIndexFromSnapFile(sn string) (term, index uint64, err error) {
 	if sn == _EMPTY_ {
 		return 0, 0, errBadSnapName
@@ -1035,6 +1089,9 @@ func termAndIndexFromSnapFile(sn string) (term, index uint64, err error) {
 	return term, index, nil
 }
 
+// setupLastSnapshot is called at startup to try and recover the last snapshot from
+// the disk if possible. We will try to recover the term, index and commit/applied
+// indices and then notify the upper layer what we found. Compacts the WAL if needed.
 func (n *raft) setupLastSnapshot() {
 	snapDir := filepath.Join(n.sd, snapshotsDir)
 	psnaps, err := os.ReadDir(snapDir)
@@ -1084,19 +1141,25 @@ func (n *raft) setupLastSnapshot() {
 	n.snapfile = latest
 	snap, err := n.loadLastSnapshot()
 	if err != nil {
+		// We failed to recover the last snapshot for some reason, so we will
+		// assume it has been corrupted and will try to delete it.
 		if n.snapfile != _EMPTY_ {
 			os.Remove(n.snapfile)
 			n.snapfile = _EMPTY_
 		}
-	} else {
-		n.pindex = snap.lastIndex
-		n.pterm = snap.lastTerm
-		n.commit = snap.lastIndex
-		n.applied = snap.lastIndex
-		n.apply.push(newCommittedEntry(n.commit, []*Entry{{EntrySnapshot, snap.data}}))
-		if _, err := n.wal.Compact(snap.lastIndex + 1); err != nil {
-			n.setWriteErrLocked(err)
-		}
+		return
+	}
+
+	// We successfully recovered the last snapshot from the disk.
+	// Recover state from the snapshot and then notify the upper layer.
+	// Compact the WAL when we're done if needed.
+	n.pindex = snap.lastIndex
+	n.pterm = snap.lastTerm
+	n.commit = snap.lastIndex
+	n.applied = snap.lastIndex
+	n.apply.push(newCommittedEntry(n.commit, []*Entry{{EntrySnapshot, snap.data}}))
+	if _, err := n.wal.Compact(snap.lastIndex + 1); err != nil {
+		n.setWriteErrLocked(err)
 	}
 }
 
@@ -1162,14 +1225,18 @@ func (n *raft) Leader() bool {
 	return n.State() == Leader
 }
 
+// isCatchingUp returns true if a catchup is currently taking place.
 func (n *raft) isCatchingUp() bool {
 	n.RLock()
 	defer n.RUnlock()
 	return n.catchup != nil
 }
 
-// This function may block for up to ~10ms to check
-// forward progress in some cases.
+// isCurrent is called from the healthchecks and returns true if we believe
+// that the upper layer is current with the Raft layer, i.e. that it has applied
+// all of the commits that we have given it.
+// Optionally we can also check whether or not we're making forward progress if we
+// aren't current, in which case this function may block for up to ~10ms to find out.
 // Lock should be held.
 func (n *raft) isCurrent(includeForwardProgress bool) bool {
 	// Check if we are closed.
@@ -1220,7 +1287,7 @@ func (n *raft) isCurrent(includeForwardProgress bool) bool {
 	// Otherwise, wait for a short period of time and see if we are making any
 	// forward progress.
 	if startDelta := n.commit - n.applied; startDelta > 0 {
-		for i := 0; i < 10; i++ { // 5ms, in 0.5ms increments
+		for i := 0; i < 10; i++ { // 10ms, in 1ms increments
 			n.Unlock()
 			time.Sleep(time.Millisecond)
 			n.Lock()
@@ -1480,9 +1547,16 @@ func (n *raft) UpdateKnownPeers(knownPeers []string) {
 	}
 }
 
+// ApplyQ returns the apply queue that new commits will be sent to for the
+// upper layer to apply.
 func (n *raft) ApplyQ() *ipQueue[*CommittedEntry] { return n.apply }
-func (n *raft) LeadChangeC() <-chan bool          { return n.leadc }
-func (n *raft) QuitC() <-chan struct{}            { return n.quit }
+
+// LeadChangeC returns the leader change channel, notifying when the Raft
+// leader role has moved.
+func (n *raft) LeadChangeC() <-chan bool { return n.leadc }
+
+// QuitC returns the quit channel, notifying when the Raft group has shut down.
+func (n *raft) QuitC() <-chan struct{} { return n.quit }
 
 func (n *raft) Created() time.Time {
 	n.RLock()
@@ -1679,6 +1753,10 @@ func (n *raft) resetElectWithLock(et time.Duration) {
 	n.Unlock()
 }
 
+// run is the top-level runner for the Raft state machine. Depending on the
+// state of the node (leader, follower, candidate, observer), this will call
+// through to other functions. It is expected that this function will run for
+// the entire life of the Raft node once started.
 func (n *raft) run() {
 	s := n.s
 	defer s.grWG.Done()
@@ -1715,9 +1793,6 @@ func (n *raft) run() {
 			n.runAsCandidate()
 		case Leader:
 			n.runAsLeader()
-		case Observer:
-			// TODO(dlc) - fix.
-			n.runAsFollower()
 		case Closed:
 			return
 		}
@@ -1765,7 +1840,8 @@ func (n *raft) setObserver(isObserver bool, extSt extensionState) {
 	n.extSt = extSt
 }
 
-// Invoked when being notified that there is something in the entryc's queue
+// processAppendEntries is called by the Raft state machine when there are
+// new append entries to be committed and sent to the upper state machine.
 func (n *raft) processAppendEntries() {
 	canProcess := true
 	if n.isClosed() {
@@ -1776,7 +1852,8 @@ func (n *raft) processAppendEntries() {
 		n.debug("AppendEntry not processing inbound, no resources")
 		canProcess = false
 	}
-	// Always pop the entries, but check if we can process them.
+	// Always pop the entries, but check if we can process them. If we can't
+	// then the entries are effectively dropped.
 	aes := n.entry.pop()
 	if canProcess {
 		for _, ae := range aes {
@@ -1786,19 +1863,25 @@ func (n *raft) processAppendEntries() {
 	n.entry.recycle(&aes)
 }
 
+// runAsFollower is called by run and will block for as long as the node is
+// running in the follower state.
 func (n *raft) runAsFollower() {
-	for n.State() == Follower {
+	for {
 		elect := n.electTimer()
 
 		select {
 		case <-n.entry.ch:
+			// New append entries have arrived over the network.
 			n.processAppendEntries()
 		case <-n.s.quitCh:
+			// The server is shutting down.
 			n.shutdown(false)
 			return
 		case <-n.quit:
+			// The Raft node is shutting down.
 			return
 		case <-elect.C:
+			// The election timer has fired so we think it's time to call an election.
 			// If we are out of resources we just want to stay in this state for the moment.
 			if n.outOfResources() {
 				n.resetElectionTimeoutWithLock()
@@ -1820,17 +1903,23 @@ func (n *raft) runAsFollower() {
 				return
 			}
 		case <-n.votes.ch:
+			// We're receiving votes from the network, probably because we have only
+			// just stepped down and they were already in flight. Ignore them.
 			n.debug("Ignoring old vote response, we have stepped down")
 			n.votes.popOne()
 		case <-n.resp.ch:
-			// Ignore
+			// We're receiving append entry responses from the network, probably because
+			// we have only just stepped down and they were already in flight. Ignore them.
 			n.resp.popOne()
 		case <-n.reqs.ch:
+			// We've just received a vote request from the network.
 			// Because of drain() it is possible that we get nil from popOne().
 			if voteReq, ok := n.reqs.popOne(); ok {
 				n.processVoteRequest(voteReq)
 			}
 		case <-n.stepdown.ch:
+			// We've received a stepdown request, start following the new leader if
+			// we can.
 			if newLeader, ok := n.stepdown.popOne(); ok {
 				n.switchToFollower(newLeader)
 				return
@@ -1839,26 +1928,29 @@ func (n *raft) runAsFollower() {
 	}
 }
 
-// Pool for CommitedEntry re-use.
+// Pool for CommittedEntry re-use.
 var cePool = sync.Pool{
 	New: func() any {
 		return &CommittedEntry{}
 	},
 }
 
-// CommitEntry is handed back to the user to apply a commit to their FSM.
+// CommittedEntry is handed back to the user to apply a commit to their upper layer.
 type CommittedEntry struct {
 	Index   uint64
 	Entries []*Entry
 }
 
-// Create a new ComittedEntry.
+// Create a new CommittedEntry. When the returned entry is no longer needed, it
+// should be returned to the pool by calling ReturnToPool.
 func newCommittedEntry(index uint64, entries []*Entry) *CommittedEntry {
 	ce := cePool.Get().(*CommittedEntry)
 	ce.Index, ce.Entries = index, entries
 	return ce
 }
 
+// ReturnToPool returns the CommittedEntry to the pool, after which point it is
+// no longer safe to reuse.
 func (ce *CommittedEntry) ReturnToPool() {
 	if ce == nil {
 		return
@@ -1879,7 +1971,8 @@ var entryPool = sync.Pool{
 	},
 }
 
-// Helper to create new entries.
+// Helper to create new entries. When the returned entry is no longer needed, it
+// should be returned to the entryPool pool.
 func newEntry(t EntryType, data []byte) *Entry {
 	entry := entryPool.Get().(*Entry)
 	entry.Type, entry.Data = t, data
@@ -1895,15 +1988,15 @@ var aePool = sync.Pool{
 
 // appendEntry is the main struct that is used to sync raft peers.
 type appendEntry struct {
-	leader  string
-	term    uint64
-	commit  uint64
-	pterm   uint64
-	pindex  uint64
-	entries []*Entry
-	// internal use only.
-	reply string
-	sub   *subscription
+	leader  string   // The leader that this append entry came from.
+	term    uint64   // The current term, as the leader understands it.
+	commit  uint64   // The commit index, as the leader understands it.
+	pterm   uint64   // The previous term, for checking consistency.
+	pindex  uint64   // The previous commit index, for checking consistency.
+	entries []*Entry // Entries to process.
+	// Below fields are for internal use only:
+	reply string        // Reply subject to respond to once committed.
+	sub   *subscription // The subscription that the append entry came in on.
 	buf   []byte
 }
 
@@ -2166,12 +2259,15 @@ func (n *raft) runAsLeader() {
 	// For forwarded proposals, both normal and remove peer proposals.
 	fsub, err := n.subscribe(psubj, n.handleForwardedProposal)
 	if err != nil {
-		n.debug("Error subscribing to forwarded proposals: %v", err)
+		n.warn("Error subscribing to forwarded proposals: %v", err)
+		n.stepdown.push(noLeader)
 		return
 	}
 	rpsub, err := n.subscribe(rpsubj, n.handleForwardedRemovePeerProposal)
 	if err != nil {
-		n.debug("Error subscribing to forwarded proposals: %v", err)
+		n.warn("Error subscribing to forwarded remove peer proposals: %v", err)
+		n.unsubscribe(fsub)
+		n.stepdown.push(noLeader)
 		return
 	}
 
@@ -2537,7 +2633,7 @@ func (n *raft) loadEntry(index uint64) (*appendEntry, error) {
 	return n.decodeAppendEntry(sm.msg, nil, _EMPTY_)
 }
 
-// applyCommit will update our commit index and apply the entry to the apply chan.
+// applyCommit will update our commit index and apply the entry to the apply queue.
 // lock should be held.
 func (n *raft) applyCommit(index uint64) error {
 	if n.State() == Closed {
@@ -2779,7 +2875,7 @@ func (n *raft) runAsCandidate() {
 	// We vote for ourselves.
 	votes := 1
 
-	for n.State() == Candidate {
+	for {
 		elect := n.electTimer()
 		select {
 		case <-n.entry.ch:
@@ -2840,16 +2936,22 @@ func (n *raft) runAsCandidate() {
 	}
 }
 
-// handleAppendEntry handles an append entry from the wire.
+// handleAppendEntry handles an append entry from the wire. This function
+// is an internal callback from the "asubj" append entry subscription.
 func (n *raft) handleAppendEntry(sub *subscription, c *client, _ *Account, subject, reply string, msg []byte) {
 	msg = copyBytes(msg)
 	if ae, err := n.decodeAppendEntry(msg, sub, reply); err == nil {
+		// Push to the new entry channel. From here one of the worker
+		// goroutines (runAsLeader, runAsFollower, runAsCandidate) will
+		// pick it up.
 		n.entry.push(ae)
 	} else {
 		n.warn("AppendEntry failed to be placed on internal channel: corrupt entry")
 	}
 }
 
+// cancelCatchup will stop an in-flight catchup by unsubscribing from the
+// catchup subscription.
 // Lock should be held.
 func (n *raft) cancelCatchup() {
 	n.debug("Canceling catchup subscription since we are now up to date")
@@ -2875,6 +2977,9 @@ func (n *raft) catchupStalled() bool {
 	return false
 }
 
+// createCatchup will create the state needed to track a catchup as it
+// runs. It then creates a unique inbox for this catchup and subscribes
+// to it. The remote side will stream entries to that subject.
 // Lock should be held.
 func (n *raft) createCatchup(ae *appendEntry) string {
 	// Cleanup any old ones.
@@ -2938,7 +3043,7 @@ func (n *raft) truncateWAL(term, index uint64) {
 	n.term, n.pterm, n.pindex = term, term, index
 }
 
-// Reset our WAL.
+// Reset our WAL. This is equivalent to truncating all data from the log.
 // Lock should be held.
 func (n *raft) resetWAL() {
 	n.truncateWAL(0, 0)
@@ -2952,7 +3057,9 @@ func (n *raft) updateLeader(newLeader string) {
 	}
 }
 
-// processAppendEntry will process an appendEntry.
+// processAppendEntry will process an appendEntry. This is called either
+// during recovery or from processAppendEntries when there are new entries
+// to be committed.
 func (n *raft) processAppendEntry(ae *appendEntry, sub *subscription) {
 	n.Lock()
 	// Don't reset here if we have been asked to assume leader position.
@@ -2991,7 +3098,9 @@ func (n *raft) processAppendEntry(ae *appendEntry, sub *subscription) {
 		return
 	}
 
-	// If we received an append entry as a candidate we should convert to a follower.
+	// If we received an append entry as a candidate then it would appear that
+	// another node has taken on the leader role already, so we should convert
+	// to a follower of that node instead.
 	if n.State() == Candidate {
 		n.debug("Received append entry in candidate state from %q, converting to follower", ae.leader)
 		if n.term < ae.term {
@@ -3004,7 +3113,8 @@ func (n *raft) processAppendEntry(ae *appendEntry, sub *subscription) {
 
 	// Catching up state.
 	catchingUp := n.catchup != nil
-	// Is this a new entry?
+	// Is this a new entry? New entries will be delivered on the append entry
+	// sub, rather than a catch-up sub.
 	isNew := sub != nil && sub == n.aesub
 
 	// Track leader directly
@@ -3204,7 +3314,7 @@ func (n *raft) processAppendEntry(ae *appendEntry, sub *subscription) {
 						n.lxfer = true
 						n.xferCampaign()
 					} else if n.paused && !n.pobserver {
-						// Here we can become a leader but need to wait for resume of the apply channel.
+						// Here we can become a leader but need to wait for resume of the apply queue.
 						n.lxfer = true
 					}
 				} else {
@@ -3254,9 +3364,12 @@ func (n *raft) processAppendEntry(ae *appendEntry, sub *subscription) {
 	}
 }
 
+// processPeerState is called when a peer state entry is received
+// over the wire or when we're updating known peers.
 // Lock should be held.
 func (n *raft) processPeerState(ps *peerState) {
-	// Update our version of peers to that of the leader.
+	// Update our version of peers to that of the leader. Calculate
+	// the number of nodes needed to establish a quorum.
 	n.csz = ps.clusterSize
 	n.qn = n.csz/2 + 1
 
@@ -3274,15 +3387,19 @@ func (n *raft) processPeerState(ps *peerState) {
 	n.writePeerState(ps)
 }
 
-// Process a response.
+// processAppendEntryResponse is called when we receive an append entry
+// response from another node. They will send a confirmation to tell us
+// whether they successfully committed the entry or not.
 func (n *raft) processAppendEntryResponse(ar *appendEntryResponse) {
 	n.trackPeer(ar.peer)
 
 	if ar.success {
+		// The remote node successfully committed the append entry.
 		n.trackResponse(ar)
 		arPool.Put(ar)
 	} else if ar.term > n.term {
-		// False here and they have a higher term.
+		// The remote node didn't commit the append entry, it looks like
+		// they are on a newer term than we are. Step down.
 		n.Lock()
 		n.term = ar.term
 		n.vote = noVote
@@ -3293,6 +3410,8 @@ func (n *raft) processAppendEntryResponse(ar *appendEntryResponse) {
 		n.Unlock()
 		arPool.Put(ar)
 	} else if ar.reply != _EMPTY_ {
+		// The remote node didn't commit the append entry and they are
+		// still on the same term, so let's try to catch them up.
 		n.catchupFollower(ar)
 	}
 }
@@ -3308,7 +3427,8 @@ func (n *raft) buildAppendEntry(entries []*Entry) *appendEntry {
 	return newAppendEntry(n.id, n.term, n.commit, n.pterm, n.pindex, entries)
 }
 
-// Determine if we should store an entry.
+// Determine if we should store an entry. This stops us from storing
+// heartbeat messages.
 func (ae *appendEntry) shouldStore() bool {
 	return ae != nil && len(ae.entries) > 0
 }
@@ -3621,6 +3741,7 @@ func (n *raft) fileWriter() {
 		case <-n.quit:
 			return
 		case <-n.wtvch:
+			// We've been asked to write out the term-and-vote file.
 			var buf [termVoteLen]byte
 			n.RLock()
 			copy(buf[0:], n.wtv)
@@ -3633,6 +3754,7 @@ func (n *raft) fileWriter() {
 				n.warn("Error writing term and vote file for %q: %v", n.group, err)
 			}
 		case <-n.wpsch:
+			// We've been asked to write out the peer state file.
 			n.RLock()
 			buf := copyBytes(n.wps)
 			n.RUnlock()
@@ -3656,7 +3778,7 @@ func (n *raft) writeTermVote() {
 	copy(buf[8:], n.vote)
 	b := buf[:8+len(n.vote)]
 
-	// If same as what we have we can ignore.
+	// If the term and vote hasn't changed then don't rewrite to disk.
 	if bytes.Equal(n.wtv, b) {
 		return
 	}
@@ -3734,7 +3856,8 @@ func (n *raft) processVoteRequest(vr *voteRequest) error {
 	vresp := &voteResponse{n.term, n.id, false}
 	defer n.debug("Sending a voteResponse %+v -> %q", vresp, vr.reply)
 
-	// Ignore if we are newer.
+	// Ignore if we are newer. This is important so that we don't accidentally process
+	// votes from a previous term if they were still in flight somewhere.
 	if vr.term < n.term {
 		n.Unlock()
 		n.sendReply(vr.reply, vresp.encode())
diff --git a/vendor/github.com/nats-io/nats-server/v2/server/stream.go b/vendor/github.com/nats-io/nats-server/v2/server/stream.go
index ea34bf55e7..118e8a95ed 100644
--- a/vendor/github.com/nats-io/nats-server/v2/server/stream.go
+++ b/vendor/github.com/nats-io/nats-server/v2/server/stream.go
@@ -239,6 +239,7 @@ type stream struct {
 	ddindex   int
 	ddtmr     *time.Timer
 	qch       chan struct{}
+	mqch      chan struct{}
 	active    bool
 	ddloaded  bool
 	closed    bool
@@ -558,6 +559,7 @@ func (a *Account) addStreamWithAssignment(config *StreamConfig, fsConfig *FileSt
 		msgs:      newIPQueue[*inMsg](s, qpfx+"messages"),
 		gets:      newIPQueue[*directGetReq](s, qpfx+"direct gets"),
 		qch:       make(chan struct{}),
+		mqch:      make(chan struct{}),
 		uch:       make(chan struct{}, 4),
 		sch:       make(chan struct{}, 1),
 	}
@@ -785,6 +787,15 @@ func (mset *stream) setStreamAssignment(sa *streamAssignment) {
 	}
 }
 
+func (mset *stream) monitorQuitC() <-chan struct{} {
+	if mset == nil {
+		return nil
+	}
+	mset.mu.RLock()
+	defer mset.mu.RUnlock()
+	return mset.mqch
+}
+
 func (mset *stream) updateC() <-chan struct{} {
 	if mset == nil {
 		return nil
@@ -985,14 +996,6 @@ func (mset *stream) lastSeqAndCLFS() (uint64, uint64) {
 	return mset.lseq, mset.getCLFS()
 }
 
-func (mset *stream) clearCLFS() uint64 {
-	mset.clMu.Lock()
-	defer mset.clMu.Unlock()
-	clfs := mset.clfs
-	mset.clfs, mset.clseq = 0, 0
-	return clfs
-}
-
 func (mset *stream) getCLFS() uint64 {
 	mset.clMu.Lock()
 	defer mset.clMu.Unlock()
@@ -4077,6 +4080,7 @@ func (mset *stream) processInboundJetStreamMsg(_ *subscription, c *client, _ *Ac
 var (
 	errLastSeqMismatch = errors.New("last sequence mismatch")
 	errMsgIdDuplicate  = errors.New("msgid is duplicate")
+	errStreamClosed    = errors.New("stream closed")
 )
 
 // processJetStreamMsg is where we try to actually process the stream msg.
@@ -4085,7 +4089,7 @@ func (mset *stream) processJetStreamMsg(subject, reply string, hdr, msg []byte,
 	c, s, store := mset.client, mset.srv, mset.store
 	if mset.closed || c == nil {
 		mset.mu.Unlock()
-		return nil
+		return errStreamClosed
 	}
 
 	// Apply the input subject transform if any
@@ -4415,7 +4419,6 @@ func (mset *stream) processJetStreamMsg(subject, reply string, hdr, msg []byte,
 		// Make sure to take into account any message assignments that we had to skip (clfs).
 		seq = lseq + 1 - clfs
 		// Check for preAcks and the need to skip vs store.
-
 		if mset.hasAllPreAcks(seq, subject) {
 			mset.clearAllPreAcks(seq)
 			store.SkipMsg()
@@ -4907,9 +4910,28 @@ func (mset *stream) stop(deleteFlag, advisory bool) error {
 	accName := jsa.account.Name
 	jsa.mu.Unlock()
 
-	// Clean up consumers.
+	// Mark as closed, kick monitor and collect consumers first.
 	mset.mu.Lock()
 	mset.closed = true
+	// Signal to the monitor loop.
+	// Can't use qch here.
+	if mset.mqch != nil {
+		close(mset.mqch)
+		mset.mqch = nil
+	}
+
+	// Stop responding to sync requests.
+	mset.stopClusterSubs()
+	// Unsubscribe from direct stream.
+	mset.unsubscribeToStream(true)
+
+	// Our info sub if we spun it up.
+	if mset.infoSub != nil {
+		mset.srv.sysUnsubscribe(mset.infoSub)
+		mset.infoSub = nil
+	}
+
+	// Clean up consumers.
 	var obs []*consumer
 	for _, o := range mset.consumers {
 		obs = append(obs, o)
@@ -4930,21 +4952,6 @@ func (mset *stream) stop(deleteFlag, advisory bool) error {
 			mset.cancelSourceConsumer(si.iname)
 		}
 	}
-
-	// Cluster cleanup
-	var sa *streamAssignment
-	if n := mset.node; n != nil {
-		if deleteFlag {
-			n.Delete()
-			sa = mset.sa
-		} else {
-			if n.NeedSnapshot() {
-				// Attempt snapshot on clean exit.
-				n.InstallSnapshot(mset.stateSnapshotLocked())
-			}
-			n.Stop()
-		}
-	}
 	mset.mu.Unlock()
 
 	isShuttingDown := js.isShuttingDown()
@@ -4961,17 +4968,6 @@ func (mset *stream) stop(deleteFlag, advisory bool) error {
 	}
 
 	mset.mu.Lock()
-	// Stop responding to sync requests.
-	mset.stopClusterSubs()
-	// Unsubscribe from direct stream.
-	mset.unsubscribeToStream(true)
-
-	// Our info sub if we spun it up.
-	if mset.infoSub != nil {
-		mset.srv.sysUnsubscribe(mset.infoSub)
-		mset.infoSub = nil
-	}
-
 	// Send stream delete advisory after the consumers.
 	if deleteFlag && advisory {
 		mset.sendDeleteAdvisoryLocked()
@@ -4983,11 +4979,17 @@ func (mset *stream) stop(deleteFlag, advisory bool) error {
 		mset.qch = nil
 	}
 
-	c := mset.client
-	mset.client = nil
-	if c == nil {
-		mset.mu.Unlock()
-		return nil
+	// Cluster cleanup
+	var sa *streamAssignment
+	if n := mset.node; n != nil {
+		if deleteFlag {
+			n.Delete()
+			sa = mset.sa
+		} else {
+			// Always attempt snapshot on clean exit.
+			n.InstallSnapshot(mset.stateSnapshotLocked())
+			n.Stop()
+		}
 	}
 
 	// Cleanup duplicate timer if running.
@@ -5013,6 +5015,8 @@ func (mset *stream) stop(deleteFlag, advisory bool) error {
 
 	// Snapshot store.
 	store := mset.store
+	c := mset.client
+	mset.client = nil
 
 	// Clustered cleanup.
 	mset.mu.Unlock()
@@ -5027,7 +5031,9 @@ func (mset *stream) stop(deleteFlag, advisory bool) error {
 		js.mu.Unlock()
 	}
 
-	c.closeConnection(ClientClosed)
+	if c != nil {
+		c.closeConnection(ClientClosed)
+	}
 
 	if sysc != nil {
 		sysc.closeConnection(ClientClosed)
@@ -5042,9 +5048,12 @@ func (mset *stream) stop(deleteFlag, advisory bool) error {
 		js.releaseStreamResources(&mset.cfg)
 		// cleanup directories after the stream
 		accDir := filepath.Join(js.config.StoreDir, accName)
-		// no op if not empty
-		os.Remove(filepath.Join(accDir, streamsDir))
-		os.Remove(accDir)
+		// Do cleanup in separate go routine similar to how fs will use purge here..
+		go func() {
+			// no op if not empty
+			os.Remove(filepath.Join(accDir, streamsDir))
+			os.Remove(accDir)
+		}()
 	} else if store != nil {
 		// Ignore errors.
 		store.Stop()
diff --git a/vendor/github.com/nats-io/nats-server/v2/server/websocket.go b/vendor/github.com/nats-io/nats-server/v2/server/websocket.go
index 014a1d72fc..0f45f91e33 100644
--- a/vendor/github.com/nats-io/nats-server/v2/server/websocket.go
+++ b/vendor/github.com/nats-io/nats-server/v2/server/websocket.go
@@ -693,9 +693,9 @@ func (s *Server) wsUpgrade(w http.ResponseWriter, r *http.Request) (*wsUpgradeRe
 	kind := CLIENT
 	if r.URL != nil {
 		ep := r.URL.EscapedPath()
-		if strings.HasPrefix(ep, leafNodeWSPath) {
+		if strings.HasSuffix(ep, leafNodeWSPath) {
 			kind = LEAF
-		} else if strings.HasPrefix(ep, mqttWSPath) {
+		} else if strings.HasSuffix(ep, mqttWSPath) {
 			kind = MQTT
 		}
 	}
diff --git a/vendor/modules.txt b/vendor/modules.txt
index 67fd16bb81..130c2d0c5d 100644
--- a/vendor/modules.txt
+++ b/vendor/modules.txt
@@ -1371,10 +1371,10 @@ github.com/mohae/deepcopy
 # github.com/mschoch/smat v0.2.0
 ## explicit; go 1.13
 github.com/mschoch/smat
-# github.com/nats-io/jwt/v2 v2.5.2
+# github.com/nats-io/jwt/v2 v2.5.3
 ## explicit; go 1.18
 github.com/nats-io/jwt/v2
-# github.com/nats-io/nats-server/v2 v2.10.4
+# github.com/nats-io/nats-server/v2 v2.10.5
 ## explicit; go 1.20
 github.com/nats-io/nats-server/v2/conf
 github.com/nats-io/nats-server/v2/internal/ldap
@@ -2079,8 +2079,8 @@ golang.org/x/text/transform
 golang.org/x/text/unicode/bidi
 golang.org/x/text/unicode/norm
 golang.org/x/text/width
-# golang.org/x/time v0.3.0
-## explicit
+# golang.org/x/time v0.4.0
+## explicit; go 1.18
 golang.org/x/time/rate
 # golang.org/x/tools v0.14.0
 ## explicit; go 1.18