diff --git a/go.mod b/go.mod index 53ca56dcda..d29495ee3d 100644 --- a/go.mod +++ b/go.mod @@ -62,7 +62,7 @@ require ( github.com/mitchellh/mapstructure v1.5.0 github.com/mna/pigeon v1.2.1 github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826 - github.com/nats-io/nats-server/v2 v2.10.16 + github.com/nats-io/nats-server/v2 v2.10.18 github.com/nats-io/nats.go v1.36.0 github.com/oklog/run v1.1.0 github.com/olekukonko/tablewriter v0.0.5 @@ -254,7 +254,7 @@ require ( github.com/json-iterator/go v1.1.12 // indirect github.com/juliangruber/go-intersect v1.1.0 // indirect github.com/kevinburke/ssh_config v1.2.0 // indirect - github.com/klauspost/compress v1.17.8 // indirect + github.com/klauspost/compress v1.17.9 // indirect github.com/klauspost/cpuid/v2 v2.2.6 // indirect github.com/leodido/go-urn v1.4.0 // indirect github.com/libregraph/oidc-go v1.1.0 // indirect @@ -269,7 +269,7 @@ require ( github.com/mendsley/gojwk v0.0.0-20141217222730-4d5ec6e58103 // indirect github.com/miekg/dns v1.1.57 // indirect github.com/mileusna/useragent v1.3.4 // indirect - github.com/minio/highwayhash v1.0.2 // indirect + github.com/minio/highwayhash v1.0.3 // indirect github.com/minio/md5-simd v1.1.2 // indirect github.com/minio/minio-go/v7 v7.0.66 // indirect github.com/minio/sha256-simd v1.0.1 // indirect @@ -281,7 +281,7 @@ require ( github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect github.com/mschoch/smat v0.2.0 // indirect - github.com/nats-io/jwt/v2 v2.5.7 // indirect + github.com/nats-io/jwt/v2 v2.5.8 // indirect github.com/nats-io/nkeys v0.4.7 // indirect github.com/nats-io/nuid v1.0.1 // indirect github.com/nxadm/tail v1.4.8 // indirect diff --git a/go.sum b/go.sum index 4c3ec138ef..3d36d288f0 100644 --- a/go.sum +++ b/go.sum @@ -755,8 +755,8 @@ github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvW github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/klauspost/compress v1.15.9/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU= -github.com/klauspost/compress v1.17.8 h1:YcnTYrq7MikUT7k0Yb5eceMmALQPYBW/Xltxn0NAMnU= -github.com/klauspost/compress v1.17.8/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= +github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA= +github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= github.com/klauspost/cpuid/v2 v2.0.1/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/klauspost/cpuid/v2 v2.2.6 h1:ndNyv040zDGIDh8thGkXYjnFtiN02M1PVVF+JE/48xc= github.com/klauspost/cpuid/v2 v2.2.6/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= @@ -850,8 +850,8 @@ github.com/miekg/dns v1.1.57 h1:Jzi7ApEIzwEPLHWRcafCN9LZSBbqQpxjt/wpgvg7wcM= github.com/miekg/dns v1.1.57/go.mod h1:uqRjCRUuEAA6qsOiJvDd+CFo/vW+y5WR6SNmHE55hZk= github.com/mileusna/useragent v1.3.4 h1:MiuRRuvGjEie1+yZHO88UBYg8YBC/ddF6T7F56i3PCk= github.com/mileusna/useragent v1.3.4/go.mod h1:3d8TOmwL/5I8pJjyVDteHtgDGcefrFUX4ccGOMKNYYc= -github.com/minio/highwayhash v1.0.2 h1:Aak5U0nElisjDCfPSG79Tgzkn2gl66NxOMspRrKnA/g= -github.com/minio/highwayhash v1.0.2/go.mod h1:BQskDq+xkJ12lmlUUi7U0M5Swg3EWR+dLTk+kldvVxY= +github.com/minio/highwayhash v1.0.3 h1:kbnuUMoHYyVl7szWjSxJnxw11k2U709jqFPPmIUyD6Q= +github.com/minio/highwayhash v1.0.3/go.mod h1:GGYsuwP/fPD6Y9hMiXuapVvlIUEhFhMTh0rxU3ik1LQ= github.com/minio/md5-simd v1.1.2 h1:Gdi1DZK69+ZVMoNHRXJyNcxrMA4dSxoYHZSQbirFg34= github.com/minio/md5-simd v1.1.2/go.mod h1:MzdKDxYpY2BT9XQFocsiZf/NKVtR7nkE4RoEpN+20RM= github.com/minio/minio-go/v7 v7.0.66 h1:bnTOXOHjOqv/gcMuiVbN9o2ngRItvqE774dG9nq0Dzw= @@ -896,10 +896,10 @@ github.com/mschoch/smat v0.2.0/go.mod h1:kc9mz7DoBKqDyiRL7VZN8KvXQMWeTaVnttLRXOl github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= github.com/namedotcom/go v0.0.0-20180403034216-08470befbe04/go.mod h1:5sN+Lt1CaY4wsPvgQH/jsuJi4XO2ssZbdsIizr4CVC8= -github.com/nats-io/jwt/v2 v2.5.7 h1:j5lH1fUXCnJnY8SsQeB/a/z9Azgu2bYIDvtPVNdxe2c= -github.com/nats-io/jwt/v2 v2.5.7/go.mod h1:ZdWS1nZa6WMZfFwwgpEaqBV8EPGVgOTDHN/wTbz0Y5A= -github.com/nats-io/nats-server/v2 v2.10.16 h1:2jXaiydp5oB/nAx/Ytf9fdCi9QN6ItIc9eehX8kwVV0= -github.com/nats-io/nats-server/v2 v2.10.16/go.mod h1:Pksi38H2+6xLe1vQx0/EA4bzetM0NqyIHcIbmgXSkIU= +github.com/nats-io/jwt/v2 v2.5.8 h1:uvdSzwWiEGWGXf+0Q+70qv6AQdvcvxrv9hPM0RiPamE= +github.com/nats-io/jwt/v2 v2.5.8/go.mod h1:ZdWS1nZa6WMZfFwwgpEaqBV8EPGVgOTDHN/wTbz0Y5A= +github.com/nats-io/nats-server/v2 v2.10.18 h1:tRdZmBuWKVAFYtayqlBB2BuCHNGAQPvoQIXOKwU3WSM= +github.com/nats-io/nats-server/v2 v2.10.18/go.mod h1:97Qyg7YydD8blKlR8yBsUlPlWyZKjA7Bp5cl3MUE9K8= github.com/nats-io/nats.go v1.36.0 h1:suEUPuWzTSse/XhESwqLxXGuj8vGRuPRoG7MoRN/qyU= github.com/nats-io/nats.go v1.36.0/go.mod h1:Ubdu4Nh9exXdSz0RVWRFBbRfrbSxOYd26oF0wkWclB8= github.com/nats-io/nkeys v0.4.7 h1:RwNJbbIdYCoClSDNY7QVKZlyb/wfT6ugvFCiKy6vDvI= @@ -1438,7 +1438,6 @@ golang.org/x/sys v0.0.0-20181026203630-95b1ffbd15a5/go.mod h1:STP8DvDyc/dI5b8T5h golang.org/x/sys v0.0.0-20181107165924-66b7b1311ac8/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181122145206-62eef0e2fa9b/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190130150945-aca44879d564/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -1516,6 +1515,7 @@ golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.22.0 h1:RI27ohtqKCnwULzJLqkv897zojh5/DwS/ENaMzUOaWI= golang.org/x/sys v0.22.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= diff --git a/vendor/github.com/klauspost/compress/flate/matchlen_amd64.s b/vendor/github.com/klauspost/compress/flate/matchlen_amd64.s index 9a7655c0f7..0782b86e3d 100644 --- a/vendor/github.com/klauspost/compress/flate/matchlen_amd64.s +++ b/vendor/github.com/klauspost/compress/flate/matchlen_amd64.s @@ -5,7 +5,6 @@ #include "textflag.h" // func matchLen(a []byte, b []byte) int -// Requires: BMI TEXT ·matchLen(SB), NOSPLIT, $0-56 MOVQ a_base+0(FP), AX MOVQ b_base+24(FP), CX @@ -17,17 +16,16 @@ TEXT ·matchLen(SB), NOSPLIT, $0-56 JB matchlen_match4_standalone matchlen_loopback_standalone: - MOVQ (AX)(SI*1), BX - XORQ (CX)(SI*1), BX - TESTQ BX, BX - JZ matchlen_loop_standalone + MOVQ (AX)(SI*1), BX + XORQ (CX)(SI*1), BX + JZ matchlen_loop_standalone #ifdef GOAMD64_v3 TZCNTQ BX, BX #else BSFQ BX, BX #endif - SARQ $0x03, BX + SHRL $0x03, BX LEAL (SI)(BX*1), SI JMP gen_match_len_end diff --git a/vendor/github.com/klauspost/compress/s2/decode_arm64.s b/vendor/github.com/klauspost/compress/s2/decode_arm64.s index 4b63d5086a..78e463f342 100644 --- a/vendor/github.com/klauspost/compress/s2/decode_arm64.s +++ b/vendor/github.com/klauspost/compress/s2/decode_arm64.s @@ -60,7 +60,7 @@ // // The d variable is implicitly R_DST - R_DBASE, and len(dst)-d is R_DEND - R_DST. // The s variable is implicitly R_SRC - R_SBASE, and len(src)-s is R_SEND - R_SRC. -TEXT ·s2Decode(SB), NOSPLIT, $56-64 +TEXT ·s2Decode(SB), NOSPLIT, $56-56 // Initialize R_SRC, R_DST and R_DBASE-R_SEND. MOVD dst_base+0(FP), R_DBASE MOVD dst_len+8(FP), R_DLEN diff --git a/vendor/github.com/klauspost/compress/s2/index.go b/vendor/github.com/klauspost/compress/s2/index.go index 18a4f7acd6..4229957b96 100644 --- a/vendor/github.com/klauspost/compress/s2/index.go +++ b/vendor/github.com/klauspost/compress/s2/index.go @@ -17,6 +17,8 @@ const ( S2IndexHeader = "s2idx\x00" S2IndexTrailer = "\x00xdi2s" maxIndexEntries = 1 << 16 + // If distance is less than this, we do not add the entry. + minIndexDist = 1 << 20 ) // Index represents an S2/Snappy index. @@ -72,6 +74,10 @@ func (i *Index) add(compressedOffset, uncompressedOffset int64) error { if latest.compressedOffset > compressedOffset { return fmt.Errorf("internal error: Earlier compressed received (%d > %d)", latest.uncompressedOffset, uncompressedOffset) } + if latest.uncompressedOffset+minIndexDist > uncompressedOffset { + // Only add entry if distance is large enough. + return nil + } } i.info = append(i.info, struct { compressedOffset int64 @@ -122,7 +128,7 @@ func (i *Index) Find(offset int64) (compressedOff, uncompressedOff int64, err er // reduce to stay below maxIndexEntries func (i *Index) reduce() { - if len(i.info) < maxIndexEntries && i.estBlockUncomp >= 1<<20 { + if len(i.info) < maxIndexEntries && i.estBlockUncomp >= minIndexDist { return } @@ -132,7 +138,7 @@ func (i *Index) reduce() { j := 0 // Each block should be at least 1MB, but don't reduce below 1000 entries. - for i.estBlockUncomp*(int64(removeN)+1) < 1<<20 && len(i.info)/(removeN+1) > 1000 { + for i.estBlockUncomp*(int64(removeN)+1) < minIndexDist && len(i.info)/(removeN+1) > 1000 { removeN++ } for idx := 0; idx < len(src); idx++ { diff --git a/vendor/github.com/klauspost/compress/s2/s2.go b/vendor/github.com/klauspost/compress/s2/s2.go index 72bcb49453..cbd1ed64d6 100644 --- a/vendor/github.com/klauspost/compress/s2/s2.go +++ b/vendor/github.com/klauspost/compress/s2/s2.go @@ -109,7 +109,11 @@ const ( chunkTypeStreamIdentifier = 0xff ) -var crcTable = crc32.MakeTable(crc32.Castagnoli) +var ( + crcTable = crc32.MakeTable(crc32.Castagnoli) + magicChunkSnappyBytes = []byte(magicChunkSnappy) // Can be passed to functions where it escapes. + magicChunkBytes = []byte(magicChunk) // Can be passed to functions where it escapes. +) // crc implements the checksum specified in section 3 of // https://github.com/google/snappy/blob/master/framing_format.txt diff --git a/vendor/github.com/klauspost/compress/s2/writer.go b/vendor/github.com/klauspost/compress/s2/writer.go index 637c931474..0a46f2b984 100644 --- a/vendor/github.com/klauspost/compress/s2/writer.go +++ b/vendor/github.com/klauspost/compress/s2/writer.go @@ -239,6 +239,9 @@ func (w *Writer) ReadFrom(r io.Reader) (n int64, err error) { } } if n2 == 0 { + if cap(inbuf) >= w.obufLen { + w.buffers.Put(inbuf) + } break } n += int64(n2) @@ -314,9 +317,9 @@ func (w *Writer) AddSkippableBlock(id uint8, data []byte) (err error) { hWriter := make(chan result) w.output <- hWriter if w.snappy { - hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunkSnappy)} + hWriter <- result{startOffset: w.uncompWritten, b: magicChunkSnappyBytes} } else { - hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunk)} + hWriter <- result{startOffset: w.uncompWritten, b: magicChunkBytes} } } @@ -370,9 +373,9 @@ func (w *Writer) EncodeBuffer(buf []byte) (err error) { hWriter := make(chan result) w.output <- hWriter if w.snappy { - hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunkSnappy)} + hWriter <- result{startOffset: w.uncompWritten, b: magicChunkSnappyBytes} } else { - hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunk)} + hWriter <- result{startOffset: w.uncompWritten, b: magicChunkBytes} } } @@ -478,9 +481,9 @@ func (w *Writer) write(p []byte) (nRet int, errRet error) { hWriter := make(chan result) w.output <- hWriter if w.snappy { - hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunkSnappy)} + hWriter <- result{startOffset: w.uncompWritten, b: magicChunkSnappyBytes} } else { - hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunk)} + hWriter <- result{startOffset: w.uncompWritten, b: magicChunkBytes} } } @@ -560,6 +563,9 @@ func (w *Writer) writeFull(inbuf []byte) (errRet error) { if w.concurrency == 1 { _, err := w.writeSync(inbuf[obufHeaderLen:]) + if cap(inbuf) >= w.obufLen { + w.buffers.Put(inbuf) + } return err } @@ -569,9 +575,9 @@ func (w *Writer) writeFull(inbuf []byte) (errRet error) { hWriter := make(chan result) w.output <- hWriter if w.snappy { - hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunkSnappy)} + hWriter <- result{startOffset: w.uncompWritten, b: magicChunkSnappyBytes} } else { - hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunk)} + hWriter <- result{startOffset: w.uncompWritten, b: magicChunkBytes} } } @@ -637,9 +643,9 @@ func (w *Writer) writeSync(p []byte) (nRet int, errRet error) { var n int var err error if w.snappy { - n, err = w.writer.Write([]byte(magicChunkSnappy)) + n, err = w.writer.Write(magicChunkSnappyBytes) } else { - n, err = w.writer.Write([]byte(magicChunk)) + n, err = w.writer.Write(magicChunkBytes) } if err != nil { return 0, w.err(err) diff --git a/vendor/github.com/klauspost/compress/zstd/dict.go b/vendor/github.com/klauspost/compress/zstd/dict.go index 8d5567fe64..b7b83164bc 100644 --- a/vendor/github.com/klauspost/compress/zstd/dict.go +++ b/vendor/github.com/klauspost/compress/zstd/dict.go @@ -273,6 +273,9 @@ func BuildDict(o BuildDictOptions) ([]byte, error) { enc.Encode(&block, b) addValues(&remain, block.literals) litTotal += len(block.literals) + if len(block.sequences) == 0 { + continue + } seqs += len(block.sequences) block.genCodes() addHist(&ll, block.coders.llEnc.Histogram()) @@ -286,6 +289,9 @@ func BuildDict(o BuildDictOptions) ([]byte, error) { if offset == 0 { continue } + if int(offset) >= len(o.History) { + continue + } if offset > 3 { newOffsets[offset-3]++ } else { @@ -336,6 +342,9 @@ func BuildDict(o BuildDictOptions) ([]byte, error) { if seqs/nUsed < 512 { // Use 512 as minimum. nUsed = seqs / 512 + if nUsed == 0 { + nUsed = 1 + } } copyHist := func(dst *fseEncoder, src *[256]int) ([]byte, error) { hist := dst.Histogram() @@ -358,6 +367,28 @@ func BuildDict(o BuildDictOptions) ([]byte, error) { fakeLength += v hist[i] = uint32(v) } + + // Ensure we aren't trying to represent RLE. + if maxCount == fakeLength { + for i := range hist { + if uint8(i) == maxSym { + fakeLength++ + maxSym++ + hist[i+1] = 1 + if maxSym > 1 { + break + } + } + if hist[0] == 0 { + fakeLength++ + hist[i] = 1 + if maxSym > 1 { + break + } + } + } + } + dst.HistogramFinished(maxSym, maxCount) dst.reUsed = false dst.useRLE = false diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s index 17901e0804..ae7d4d3295 100644 --- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s +++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s @@ -162,12 +162,12 @@ finalize: MOVD h, ret+24(FP) RET -// func writeBlocks(d *Digest, b []byte) int +// func writeBlocks(s *Digest, b []byte) int TEXT ·writeBlocks(SB), NOSPLIT|NOFRAME, $0-40 LDP ·primes+0(SB), (prime1, prime2) // Load state. Assume v[1-4] are stored contiguously. - MOVD d+0(FP), digest + MOVD s+0(FP), digest LDP 0(digest), (v1, v2) LDP 16(digest), (v3, v4) diff --git a/vendor/github.com/klauspost/compress/zstd/matchlen_amd64.s b/vendor/github.com/klauspost/compress/zstd/matchlen_amd64.s index 9a7655c0f7..0782b86e3d 100644 --- a/vendor/github.com/klauspost/compress/zstd/matchlen_amd64.s +++ b/vendor/github.com/klauspost/compress/zstd/matchlen_amd64.s @@ -5,7 +5,6 @@ #include "textflag.h" // func matchLen(a []byte, b []byte) int -// Requires: BMI TEXT ·matchLen(SB), NOSPLIT, $0-56 MOVQ a_base+0(FP), AX MOVQ b_base+24(FP), CX @@ -17,17 +16,16 @@ TEXT ·matchLen(SB), NOSPLIT, $0-56 JB matchlen_match4_standalone matchlen_loopback_standalone: - MOVQ (AX)(SI*1), BX - XORQ (CX)(SI*1), BX - TESTQ BX, BX - JZ matchlen_loop_standalone + MOVQ (AX)(SI*1), BX + XORQ (CX)(SI*1), BX + JZ matchlen_loop_standalone #ifdef GOAMD64_v3 TZCNTQ BX, BX #else BSFQ BX, BX #endif - SARQ $0x03, BX + SHRL $0x03, BX LEAL (SI)(BX*1), SI JMP gen_match_len_end diff --git a/vendor/github.com/minio/highwayhash/.golangci.yml b/vendor/github.com/minio/highwayhash/.golangci.yml index 16a72da1a1..39310d0d4a 100644 --- a/vendor/github.com/minio/highwayhash/.golangci.yml +++ b/vendor/github.com/minio/highwayhash/.golangci.yml @@ -12,13 +12,11 @@ linters: - goimports - misspell - govet - - golint + - revive - ineffassign - gosimple - - deadcode - unparam - unused - - structcheck issues: exclude-use-default: false @@ -27,4 +25,4 @@ issues: - error strings should not be capitalized or end with punctuation or a newline - should have comment # TODO(aead): Remove once all exported ident. have comments! service: - golangci-lint-version: 1.20.0 # use the fixed version to not introduce new linters unexpectedly + golangci-lint-version: 1.51.2 # use the fixed version to not introduce new linters unexpectedly diff --git a/vendor/github.com/minio/highwayhash/README.md b/vendor/github.com/minio/highwayhash/README.md index 9bec7edf5d..0504822c89 100644 --- a/vendor/github.com/minio/highwayhash/README.md +++ b/vendor/github.com/minio/highwayhash/README.md @@ -42,17 +42,17 @@ So for moderately sized messages it tops out at about 15 GB/sec. Also for small ### ARM Performance -Below are the single core results on an EC2 m6g.4xlarge (Graviton2) instance for 256 bit outputs: +Below are the single core results on an EC2 c7g.4xlarge (Graviton3) instance for 256 bit outputs: ``` -BenchmarkSum256_16 96.82 MB/s -BenchmarkSum256_64 445.35 MB/s -BenchmarkSum256_1K 2782.46 MB/s -BenchmarkSum256_8K 4083.58 MB/s -BenchmarkSum256_1M 4986.41 MB/s -BenchmarkSum256_5M 4992.72 MB/s -BenchmarkSum256_10M 4993.32 MB/s -BenchmarkSum256_25M 4992.55 MB/s +BenchmarkSum256_16 143.66 MB/s +BenchmarkSum256_64 628.75 MB/s +BenchmarkSum256_1K 3621.71 MB/s +BenchmarkSum256_8K 5039.64 MB/s +BenchmarkSum256_1M 5279.79 MB/s +BenchmarkSum256_5M 5474.60 MB/s +BenchmarkSum256_10M 5621.73 MB/s +BenchmarkSum256_25M 5250.47 MB/s ``` ### ppc64le Performance diff --git a/vendor/github.com/minio/highwayhash/highwayhashSVE_arm64.s b/vendor/github.com/minio/highwayhash/highwayhashSVE_arm64.s new file mode 100644 index 0000000000..e9b6eb0615 --- /dev/null +++ b/vendor/github.com/minio/highwayhash/highwayhashSVE_arm64.s @@ -0,0 +1,132 @@ +// +// Copyright (c) 2024 Minio Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +//+build !noasm,!appengine + +#include "textflag.h" + +TEXT ·getVectorLength(SB), NOSPLIT, $0 + WORD $0xd2800002 // mov x2, #0 + WORD $0x04225022 // addvl x2, x2, #1 + WORD $0xd37df042 // lsl x2, x2, #3 + WORD $0xd2800003 // mov x3, #0 + WORD $0x04635023 // addpl x3, x3, #1 + WORD $0xd37df063 // lsl x3, x3, #3 + MOVD R2, vl+0(FP) + MOVD R3, pl+8(FP) + RET + +TEXT ·updateArm64Sve(SB), NOSPLIT, $0 + MOVD state+0(FP), R0 + MOVD msg_base+8(FP), R1 + MOVD msg_len+16(FP), R2 // length of message + SUBS $32, R2 + BMI completeSve + + WORD $0x2518e3e1 // ptrue p1.b + WORD $0xa5e0a401 // ld1d z1.d, p1/z, [x0] + WORD $0xa5e1a402 // ld1d z2.d, p1/z, [x0, #1, MUL VL] + WORD $0xa5e2a403 // ld1d z3.d, p1/z, [x0, #2, MUL VL] + WORD $0xa5e3a404 // ld1d z4.d, p1/z, [x0, #3, MUL VL] + + // Load zipper merge constants table pointer + MOVD $·zipperMergeSve(SB), R3 + WORD $0xa5e0a465 // ld1d z5.d, p1/z, [x3] + WORD $0x25b8c006 // mov z6.s, #0 + WORD $0x25d8e3e2 // ptrue p2.d /* set every other lane for "s" type */ + +loopSve: + WORD $0xa5e0a420 // ld1d z0.d, p1/z, [x1] + ADD $32, R1 + + WORD $0x04e00042 // add z2.d, z2.d, z0.d + WORD $0x04e30042 // add z2.d, z2.d, z3.d + WORD $0x04e09420 // lsr z0.d, z1.d, #32 + WORD $0x05a6c847 // sel z7.s, p2, z2.s, z6.s + WORD $0x04d004e0 // mul z0.d, p1/m, z0.d, z7.d + WORD $0x04a33003 // eor z3.d, z0.d, z3.d + WORD $0x04e10081 // add z1.d, z4.d, z1.d + WORD $0x04e09440 // lsr z0.d, z2.d, #32 + WORD $0x05a6c827 // sel z7.s, p2, z1.s, z6.s + WORD $0x04d004e0 // mul z0.d, p1/m, z0.d, z7.d + WORD $0x04a43004 // eor z4.d, z0.d, z4.d + WORD $0x05253040 // tbl z0.b, z2.b, z5.b + WORD $0x04e00021 // add z1.d, z1.d, z0.d + WORD $0x05253020 // tbl z0.b, z1.b, z5.b + WORD $0x04e00042 // add z2.d, z2.d, z0.d + + SUBS $32, R2 + BPL loopSve + + WORD $0xe5e0e401 // st1d z1.d, p1, [x0] + WORD $0xe5e1e402 // st1d z2.d, p1, [x0, #1, MUL VL] + WORD $0xe5e2e403 // st1d z3.d, p1, [x0, #2, MUL VL] + WORD $0xe5e3e404 // st1d z4.d, p1, [x0, #3, MUL VL] + +completeSve: + RET + +TEXT ·updateArm64Sve2(SB), NOSPLIT, $0 + MOVD state+0(FP), R0 + MOVD msg_base+8(FP), R1 + MOVD msg_len+16(FP), R2 // length of message + SUBS $32, R2 + BMI completeSve2 + + WORD $0x2518e3e1 // ptrue p1.b + WORD $0xa5e0a401 // ld1d z1.d, p1/z, [x0] + WORD $0xa5e1a402 // ld1d z2.d, p1/z, [x0, #1, MUL VL] + WORD $0xa5e2a403 // ld1d z3.d, p1/z, [x0, #2, MUL VL] + WORD $0xa5e3a404 // ld1d z4.d, p1/z, [x0, #3, MUL VL] + + // Load zipper merge constants table pointer + MOVD $·zipperMergeSve(SB), R3 + WORD $0xa5e0a465 // ld1d z5.d, p1/z, [x3] + +loopSve2: + WORD $0xa5e0a420 // ld1d z0.d, p1/z, [x1] + ADD $32, R1 + + WORD $0x04e00042 // add z2.d, z2.d, z0.d + WORD $0x04e30042 // add z2.d, z2.d, z3.d + WORD $0x04e09420 // lsr z0.d, z1.d, #32 + WORD $0x45c27800 // umullb z0.d, z0.s, z2.s + WORD $0x04a33003 // eor z3.d, z0.d, z3.d + WORD $0x04e10081 // add z1.d, z4.d, z1.d + WORD $0x04e09440 // lsr z0.d, z2.d, #32 + WORD $0x45c17800 // umullb z0.d, z0.s, z1.s + WORD $0x04a43004 // eor z4.d, z0.d, z4.d + WORD $0x05253040 // tbl z0.b, z2.b, z5.b + WORD $0x04e00021 // add z1.d, z1.d, z0.d + WORD $0x05253020 // tbl z0.b, z1.b, z5.b + WORD $0x04e00042 // add z2.d, z2.d, z0.d + + SUBS $32, R2 + BPL loopSve2 + + WORD $0xe5e0e401 // st1d z1.d, p1, [x0] + WORD $0xe5e1e402 // st1d z2.d, p1, [x0, #1, MUL VL] + WORD $0xe5e2e403 // st1d z3.d, p1, [x0, #2, MUL VL] + WORD $0xe5e3e404 // st1d z4.d, p1, [x0, #3, MUL VL] + +completeSve2: + RET + +DATA ·zipperMergeSve+0x00(SB)/8, $0x000f010e05020c03 +DATA ·zipperMergeSve+0x08(SB)/8, $0x070806090d0a040b +DATA ·zipperMergeSve+0x10(SB)/8, $0x101f111e15121c13 +DATA ·zipperMergeSve+0x18(SB)/8, $0x171816191d1a141b +GLOBL ·zipperMergeSve(SB), (NOPTR+RODATA), $32 diff --git a/vendor/github.com/minio/highwayhash/highwayhash_amd64.go b/vendor/github.com/minio/highwayhash/highwayhash_amd64.go index 5e64cc3b45..b7717836eb 100644 --- a/vendor/github.com/minio/highwayhash/highwayhash_amd64.go +++ b/vendor/github.com/minio/highwayhash/highwayhash_amd64.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a license that can be // found in the LICENSE file. +//go:build amd64 && !gccgo && !appengine && !nacl && !noasm // +build amd64,!gccgo,!appengine,!nacl,!noasm package highwayhash @@ -12,6 +13,8 @@ var ( useSSE4 = cpu.X86.HasSSE41 useAVX2 = cpu.X86.HasAVX2 useNEON = false + useSVE = false + useSVE2 = false useVMX = false ) diff --git a/vendor/github.com/minio/highwayhash/highwayhash_arm64.go b/vendor/github.com/minio/highwayhash/highwayhash_arm64.go index 27935d705e..d94e482d2d 100644 --- a/vendor/github.com/minio/highwayhash/highwayhash_arm64.go +++ b/vendor/github.com/minio/highwayhash/highwayhash_arm64.go @@ -1,24 +1,54 @@ -// Copyright (c) 2017 Minio Inc. All rights reserved. +// Copyright (c) 2017-2024 Minio Inc. All rights reserved. // Use of this source code is governed by a license that can be // found in the LICENSE file. -//+build !noasm,!appengine +//go:build !noasm && !appengine +// +build !noasm,!appengine package highwayhash +import ( + "golang.org/x/sys/cpu" +) + var ( useSSE4 = false useAVX2 = false - useNEON = true + useNEON = cpu.ARM64.HasASIMD + useSVE = cpu.ARM64.HasSVE + useSVE2 = false // cpu.ARM64.HasSVE2 -- disable until tested on real hardware useVMX = false ) +func init() { + if useSVE { + if vl, _ := getVectorLength(); vl != 256 { + // + // Since HighwahHash is designed for AVX2, + // SVE/SVE2 instructions only run correctly + // for vector length of 256 + // + useSVE2 = false + useSVE = false + } + } +} + //go:noescape func initializeArm64(state *[16]uint64, key []byte) //go:noescape func updateArm64(state *[16]uint64, msg []byte) +//go:noescape +func getVectorLength() (vl, pl uint64) + +//go:noescape +func updateArm64Sve(state *[16]uint64, msg []byte) + +//go:noescape +func updateArm64Sve2(state *[16]uint64, msg []byte) + //go:noescape func finalizeArm64(out []byte, state *[16]uint64) @@ -31,7 +61,11 @@ func initialize(state *[16]uint64, key []byte) { } func update(state *[16]uint64, msg []byte) { - if useNEON { + if useSVE2 { + updateArm64Sve2(state, msg) + } else if useSVE { + updateArm64Sve(state, msg) + } else if useNEON { updateArm64(state, msg) } else { updateGeneric(state, msg) diff --git a/vendor/github.com/minio/highwayhash/highwayhash_generic.go b/vendor/github.com/minio/highwayhash/highwayhash_generic.go index 3909e79139..1f66e223ed 100644 --- a/vendor/github.com/minio/highwayhash/highwayhash_generic.go +++ b/vendor/github.com/minio/highwayhash/highwayhash_generic.go @@ -46,40 +46,113 @@ func initializeGeneric(state *[16]uint64, k []byte) { } func updateGeneric(state *[16]uint64, msg []byte) { - for len(msg) > 0 { - // add message - state[v1+0] += binary.LittleEndian.Uint64(msg) - state[v1+1] += binary.LittleEndian.Uint64(msg[8:]) - state[v1+2] += binary.LittleEndian.Uint64(msg[16:]) - state[v1+3] += binary.LittleEndian.Uint64(msg[24:]) - - // v1 += mul0 - state[v1+0] += state[mul0+0] - state[v1+1] += state[mul0+1] - state[v1+2] += state[mul0+2] - state[v1+3] += state[mul0+3] + for len(msg) >= 32 { + m := msg[:32] + // add message + mul0 + // Interleave operations to hide multiplication + state[v1+0] += binary.LittleEndian.Uint64(m) + state[mul0+0] state[mul0+0] ^= uint64(uint32(state[v1+0])) * (state[v0+0] >> 32) - state[mul0+1] ^= uint64(uint32(state[v1+1])) * (state[v0+1] >> 32) - state[mul0+2] ^= uint64(uint32(state[v1+2])) * (state[v0+2] >> 32) - state[mul0+3] ^= uint64(uint32(state[v1+3])) * (state[v0+3] >> 32) - - // v0 += mul1 state[v0+0] += state[mul1+0] - state[v0+1] += state[mul1+1] - state[v0+2] += state[mul1+2] - state[v0+3] += state[mul1+3] - state[mul1+0] ^= uint64(uint32(state[v0+0])) * (state[v1+0] >> 32) + + state[v1+1] += binary.LittleEndian.Uint64(m[8:]) + state[mul0+1] + state[mul0+1] ^= uint64(uint32(state[v1+1])) * (state[v0+1] >> 32) + state[v0+1] += state[mul1+1] state[mul1+1] ^= uint64(uint32(state[v0+1])) * (state[v1+1] >> 32) + + state[v1+2] += binary.LittleEndian.Uint64(m[16:]) + state[mul0+2] + state[mul0+2] ^= uint64(uint32(state[v1+2])) * (state[v0+2] >> 32) + state[v0+2] += state[mul1+2] state[mul1+2] ^= uint64(uint32(state[v0+2])) * (state[v1+2] >> 32) + + state[v1+3] += binary.LittleEndian.Uint64(m[24:]) + state[mul0+3] + state[mul0+3] ^= uint64(uint32(state[v1+3])) * (state[v0+3] >> 32) + state[v0+3] += state[mul1+3] state[mul1+3] ^= uint64(uint32(state[v0+3])) * (state[v1+3] >> 32) - zipperMerge(state[v1+0], state[v1+1], &state[v0+0], &state[v0+1]) - zipperMerge(state[v1+2], state[v1+3], &state[v0+2], &state[v0+3]) + // inlined: zipperMerge(state[v1+0], state[v1+1], &state[v0+0], &state[v0+1]) + { + val0 := state[v1+0] + val1 := state[v1+1] + res := val0 & (0xff << (2 * 8)) + res2 := (val0 & (0xff << (7 * 8))) + (val1 & (0xff << (2 * 8))) + res += (val1 & (0xff << (7 * 8))) >> 8 + res2 += (val0 & (0xff << (6 * 8))) >> 8 + res += ((val0 & (0xff << (5 * 8))) + (val1 & (0xff << (6 * 8)))) >> 16 + res2 += (val1 & (0xff << (5 * 8))) >> 16 + res += ((val0 & (0xff << (3 * 8))) + (val1 & (0xff << (4 * 8)))) >> 24 + res2 += ((val1 & (0xff << (3 * 8))) + (val0 & (0xff << (4 * 8)))) >> 24 + res += (val0 & (0xff << (1 * 8))) << 32 + res2 += (val1 & 0xff) << 48 + res += val0 << 56 + res2 += (val1 & (0xff << (1 * 8))) << 24 - zipperMerge(state[v0+0], state[v0+1], &state[v1+0], &state[v1+1]) - zipperMerge(state[v0+2], state[v0+3], &state[v1+2], &state[v1+3]) + state[v0+0] += res + state[v0+1] += res2 + } + // zipperMerge(state[v1+2], state[v1+3], &state[v0+2], &state[v0+3]) + { + val0 := state[v1+2] + val1 := state[v1+3] + res := val0 & (0xff << (2 * 8)) + res2 := (val0 & (0xff << (7 * 8))) + (val1 & (0xff << (2 * 8))) + res += (val1 & (0xff << (7 * 8))) >> 8 + res2 += (val0 & (0xff << (6 * 8))) >> 8 + res += ((val0 & (0xff << (5 * 8))) + (val1 & (0xff << (6 * 8)))) >> 16 + res2 += (val1 & (0xff << (5 * 8))) >> 16 + res += ((val0 & (0xff << (3 * 8))) + (val1 & (0xff << (4 * 8)))) >> 24 + res2 += ((val1 & (0xff << (3 * 8))) + (val0 & (0xff << (4 * 8)))) >> 24 + res += (val0 & (0xff << (1 * 8))) << 32 + res2 += (val1 & 0xff) << 48 + res += val0 << 56 + res2 += (val1 & (0xff << (1 * 8))) << 24 + + state[v0+2] += res + state[v0+3] += res2 + } + + // inlined: zipperMerge(state[v0+0], state[v0+1], &state[v1+0], &state[v1+1]) + { + val0 := state[v0+0] + val1 := state[v0+1] + res := val0 & (0xff << (2 * 8)) + res2 := (val0 & (0xff << (7 * 8))) + (val1 & (0xff << (2 * 8))) + res += (val1 & (0xff << (7 * 8))) >> 8 + res2 += (val0 & (0xff << (6 * 8))) >> 8 + res += ((val0 & (0xff << (5 * 8))) + (val1 & (0xff << (6 * 8)))) >> 16 + res2 += (val1 & (0xff << (5 * 8))) >> 16 + res += ((val0 & (0xff << (3 * 8))) + (val1 & (0xff << (4 * 8)))) >> 24 + res2 += ((val1 & (0xff << (3 * 8))) + (val0 & (0xff << (4 * 8)))) >> 24 + res += (val0 & (0xff << (1 * 8))) << 32 + res2 += (val1 & 0xff) << 48 + res += val0 << 56 + res2 += (val1 & (0xff << (1 * 8))) << 24 + + state[v1+0] += res + state[v1+1] += res2 + } + + //inlined: zipperMerge(state[v0+2], state[v0+3], &state[v1+2], &state[v1+3]) + { + val0 := state[v0+2] + val1 := state[v0+3] + res := val0 & (0xff << (2 * 8)) + res2 := (val0 & (0xff << (7 * 8))) + (val1 & (0xff << (2 * 8))) + res += (val1 & (0xff << (7 * 8))) >> 8 + res2 += (val0 & (0xff << (6 * 8))) >> 8 + res += ((val0 & (0xff << (5 * 8))) + (val1 & (0xff << (6 * 8)))) >> 16 + res2 += (val1 & (0xff << (5 * 8))) >> 16 + res += ((val0 & (0xff << (3 * 8))) + (val1 & (0xff << (4 * 8)))) >> 24 + res2 += ((val1 & (0xff << (3 * 8))) + (val0 & (0xff << (4 * 8)))) >> 24 + res += (val0 & (0xff << (1 * 8))) << 32 + res2 += (val1 & 0xff) << 48 + res += val0 << 56 + res2 += (val1 & (0xff << (1 * 8))) << 24 + + state[v1+2] += res + state[v1+3] += res2 + } msg = msg[32:] } } @@ -124,25 +197,129 @@ func finalizeGeneric(out []byte, state *[16]uint64) { } } +// Experiments on variations left for future reference... +/* func zipperMerge(v0, v1 uint64, d0, d1 *uint64) { - m0 := v0 & (0xFF << (2 * 8)) - m1 := (v1 & (0xFF << (7 * 8))) >> 8 - m2 := ((v0 & (0xFF << (5 * 8))) + (v1 & (0xFF << (6 * 8)))) >> 16 - m3 := ((v0 & (0xFF << (3 * 8))) + (v1 & (0xFF << (4 * 8)))) >> 24 - m4 := (v0 & (0xFF << (1 * 8))) << 32 - m5 := v0 << 56 + if true { + // fastest. original interleaved... + res := v0 & (0xff << (2 * 8)) + res2 := (v0 & (0xff << (7 * 8))) + (v1 & (0xff << (2 * 8))) + res += (v1 & (0xff << (7 * 8))) >> 8 + res2 += (v0 & (0xff << (6 * 8))) >> 8 + res += ((v0 & (0xff << (5 * 8))) + (v1 & (0xff << (6 * 8)))) >> 16 + res2 += (v1 & (0xff << (5 * 8))) >> 16 + res += ((v0 & (0xff << (3 * 8))) + (v1 & (0xff << (4 * 8)))) >> 24 + res2 += ((v1 & (0xff << (3 * 8))) + (v0 & (0xff << (4 * 8)))) >> 24 + res += (v0 & (0xff << (1 * 8))) << 32 + res2 += (v1 & 0xff) << 48 + res += v0 << 56 + res2 += (v1 & (0xff << (1 * 8))) << 24 - *d0 += m0 + m1 + m2 + m3 + m4 + m5 + *d0 += res + *d1 += res2 + } else if false { + // Reading bytes and combining into uint64 + var v0b [8]byte + binary.LittleEndian.PutUint64(v0b[:], v0) + var v1b [8]byte + binary.LittleEndian.PutUint64(v1b[:], v1) + var res, res2 uint64 - m0 = (v0 & (0xFF << (7 * 8))) + (v1 & (0xFF << (2 * 8))) - m1 = (v0 & (0xFF << (6 * 8))) >> 8 - m2 = (v1 & (0xFF << (5 * 8))) >> 16 - m3 = ((v1 & (0xFF << (3 * 8))) + (v0 & (0xFF << (4 * 8)))) >> 24 - m4 = (v1 & 0xFF) << 48 - m5 = (v1 & (0xFF << (1 * 8))) << 24 + res = uint64(v0b[0]) << (7 * 8) + res2 = uint64(v1b[0]) << (6 * 8) + res |= uint64(v0b[1]) << (5 * 8) + res2 |= uint64(v1b[1]) << (4 * 8) + res |= uint64(v0b[2]) << (2 * 8) + res2 |= uint64(v1b[2]) << (2 * 8) + res |= uint64(v0b[3]) + res2 |= uint64(v0b[4]) << (1 * 8) + res |= uint64(v0b[5]) << (3 * 8) + res2 |= uint64(v0b[6]) << (5 * 8) + res |= uint64(v1b[4]) << (1 * 8) + res2 |= uint64(v0b[7]) << (7 * 8) + res |= uint64(v1b[6]) << (4 * 8) + res2 |= uint64(v1b[3]) + res |= uint64(v1b[7]) << (6 * 8) + res2 |= uint64(v1b[5]) << (3 * 8) - *d1 += m3 + m2 + m5 + m1 + m4 + m0 + *d0 += res + *d1 += res2 + + } else if false { + // bytes to bytes shuffle + var v0b [8]byte + binary.LittleEndian.PutUint64(v0b[:], v0) + var v1b [8]byte + binary.LittleEndian.PutUint64(v1b[:], v1) + var res [8]byte + + //res += ((v0 & (0xff << (3 * 8))) + (v1 & (0xff << (4 * 8)))) >> 24 + res[0] = v0b[3] + res[1] = v1b[4] + + // res := v0 & (0xff << (2 * 8)) + res[2] = v0b[2] + + //res += ((v0 & (0xff << (5 * 8))) + (v1 & (0xff << (6 * 8)))) >> 16 + res[3] = v0b[5] + res[4] = v1b[6] + + //res += (v0 & (0xff << (1 * 8))) << 32 + res[5] = v0b[1] + + //res += (v1 & (0xff << (7 * 8))) >> 8 + res[6] += v1b[7] + + //res += v0 << 56 + res[7] = v0b[0] + v0 = binary.LittleEndian.Uint64(res[:]) + *d0 += v0 + + //res += ((v1 & (0xff << (3 * 8))) + (v0 & (0xff << (4 * 8)))) >> 24 + res[0] = v1b[3] + res[1] = v0b[4] + + res[2] = v1b[2] + + // res += (v1 & (0xff << (5 * 8))) >> 16 + res[3] = v1b[5] + + //res += (v1 & (0xff << (1 * 8))) << 24 + res[4] = v1b[1] + + // res += (v0 & (0xff << (6 * 8))) >> 8 + res[5] = v0b[6] + + //res := (v0 & (0xff << (7 * 8))) + (v1 & (0xff << (2 * 8))) + res[7] = v0b[7] + + //res += (v1 & 0xff) << 48 + res[6] = v1b[0] + + v0 = binary.LittleEndian.Uint64(res[:]) + *d1 += v0 + } else { + // original. + res := v0 & (0xff << (2 * 8)) + res += (v1 & (0xff << (7 * 8))) >> 8 + res += ((v0 & (0xff << (5 * 8))) + (v1 & (0xff << (6 * 8)))) >> 16 + res += ((v0 & (0xff << (3 * 8))) + (v1 & (0xff << (4 * 8)))) >> 24 + res += (v0 & (0xff << (1 * 8))) << 32 + res += v0 << 56 + + *d0 += res + + res = (v0 & (0xff << (7 * 8))) + (v1 & (0xff << (2 * 8))) + res += (v0 & (0xff << (6 * 8))) >> 8 + res += (v1 & (0xff << (5 * 8))) >> 16 + res += ((v1 & (0xff << (3 * 8))) + (v0 & (0xff << (4 * 8)))) >> 24 + res += (v1 & 0xff) << 48 + res += (v1 & (0xff << (1 * 8))) << 24 + + *d1 += res + } } +*/ // reduce v = [v0, v1, v2, v3] mod the irreducible polynomial x^128 + x^2 + x func reduceMod(v0, v1, v2, v3 uint64) (r0, r1 uint64) { diff --git a/vendor/github.com/minio/highwayhash/highwayhash_ppc64le.go b/vendor/github.com/minio/highwayhash/highwayhash_ppc64le.go index a988c74e60..cf9ee1a262 100644 --- a/vendor/github.com/minio/highwayhash/highwayhash_ppc64le.go +++ b/vendor/github.com/minio/highwayhash/highwayhash_ppc64le.go @@ -2,7 +2,8 @@ // Use of this source code is governed by a license that can be // found in the LICENSE file. -//+build !noasm,!appengine +//go:build !noasm && !appengine +// +build !noasm,!appengine package highwayhash @@ -10,6 +11,8 @@ var ( useSSE4 = false useAVX2 = false useNEON = false + useSVE = false + useSVE2 = false useVMX = true ) diff --git a/vendor/github.com/minio/highwayhash/highwayhash_ref.go b/vendor/github.com/minio/highwayhash/highwayhash_ref.go index e70a94779b..42cbbb4c44 100644 --- a/vendor/github.com/minio/highwayhash/highwayhash_ref.go +++ b/vendor/github.com/minio/highwayhash/highwayhash_ref.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a license that can be // found in the LICENSE file. +//go:build noasm || (!amd64 && !arm64 && !ppc64le) // +build noasm !amd64,!arm64,!ppc64le package highwayhash @@ -10,6 +11,8 @@ var ( useSSE4 = false useAVX2 = false useNEON = false + useSVE = false + useSVE2 = false useVMX = false ) diff --git a/vendor/github.com/nats-io/jwt/v2/account_claims.go b/vendor/github.com/nats-io/jwt/v2/account_claims.go index 5c1665d5eb..fa8fc5851e 100644 --- a/vendor/github.com/nats-io/jwt/v2/account_claims.go +++ b/vendor/github.com/nats-io/jwt/v2/account_claims.go @@ -152,10 +152,20 @@ type Mapping map[Subject][]WeightedMapping func (m *Mapping) Validate(vr *ValidationResults) { for ubFrom, wm := range (map[Subject][]WeightedMapping)(*m) { ubFrom.Validate(vr) + perCluster := make(map[string]uint8) total := uint8(0) - for _, wm := range wm { - wm.Subject.Validate(vr) - total += wm.GetWeight() + for _, e := range wm { + e.Subject.Validate(vr) + if e.Cluster != "" { + t := perCluster[e.Cluster] + t += e.Weight + perCluster[e.Cluster] = t + if t > 100 { + vr.AddError("Mapping %q in cluster %q exceeds 100%% among all of it's weighted to mappings", ubFrom, e.Cluster) + } + } else { + total += e.GetWeight() + } } if total > 100 { vr.AddError("Mapping %q exceeds 100%% among all of it's weighted to mappings", ubFrom) diff --git a/vendor/github.com/nats-io/nats-server/v2/server/accounts.go b/vendor/github.com/nats-io/nats-server/v2/server/accounts.go index 2f80b60621..4b24903a0d 100644 --- a/vendor/github.com/nats-io/nats-server/v2/server/accounts.go +++ b/vendor/github.com/nats-io/nats-server/v2/server/accounts.go @@ -96,6 +96,9 @@ type Account struct { nameTag string lastLimErr int64 routePoolIdx int + // Guarantee that only one goroutine can be running either checkJetStreamMigrate + // or clearObserverState at a given time for this account to prevent interleaving. + jscmMu sync.Mutex } const ( @@ -1479,6 +1482,10 @@ func (a *Account) addServiceImportWithClaim(destination *Account, from, to strin return err } + if err := a.serviceImportFormsCycle(destination, to); err != nil { + return err + } + _, err := a.addServiceImport(destination, from, to, imClaim) return err @@ -2466,6 +2473,10 @@ func (a *Account) AddMappedStreamImportWithClaim(account *Account, from, to stri return err } + if err := a.streamImportFormsCycle(account, from); err != nil { + return err + } + var ( usePub bool tr *subjectTransform @@ -2811,9 +2822,12 @@ func (a *Account) isIssuerClaimTrusted(claims *jwt.ActivationClaims) bool { // check is done with the account's name, not the pointer. This is used // during config reload where we are comparing current and new config // in which pointers are different. -// No lock is acquired in this function, so it is assumed that the -// import maps are not changed while this executes. +// Acquires `a` read lock, but `b` is assumed to not be accessed +// by anyone but the caller (`b` is not registered anywhere). func (a *Account) checkStreamImportsEqual(b *Account) bool { + a.mu.RLock() + defer a.mu.RUnlock() + if len(a.imports.streams) != len(b.imports.streams) { return false } @@ -3181,6 +3195,9 @@ func (s *Server) updateAccountClaimsWithRefresh(a *Account, ac *jwt.AccountClaim a.nameTag = ac.Name a.tags = ac.Tags + // Grab trace label under lock. + tl := a.traceLabel() + // Check for external authorization. if ac.HasExternalAuthorization() { a.extAuth = &jwt.ExternalAuthorization{} @@ -3201,10 +3218,10 @@ func (s *Server) updateAccountClaimsWithRefresh(a *Account, ac *jwt.AccountClaim } if a.imports.services != nil { old.imports.services = make(map[string]*serviceImport, len(a.imports.services)) - } - for k, v := range a.imports.services { - old.imports.services[k] = v - delete(a.imports.services, k) + for k, v := range a.imports.services { + old.imports.services[k] = v + delete(a.imports.services, k) + } } alteredScope := map[string]struct{}{} @@ -3274,13 +3291,13 @@ func (s *Server) updateAccountClaimsWithRefresh(a *Account, ac *jwt.AccountClaim for _, e := range ac.Exports { switch e.Type { case jwt.Stream: - s.Debugf("Adding stream export %q for %s", e.Subject, a.traceLabel()) + s.Debugf("Adding stream export %q for %s", e.Subject, tl) if err := a.addStreamExportWithAccountPos( string(e.Subject), authAccounts(e.TokenReq), e.AccountTokenPosition); err != nil { - s.Debugf("Error adding stream export to account [%s]: %v", a.traceLabel(), err.Error()) + s.Debugf("Error adding stream export to account [%s]: %v", tl, err.Error()) } case jwt.Service: - s.Debugf("Adding service export %q for %s", e.Subject, a.traceLabel()) + s.Debugf("Adding service export %q for %s", e.Subject, tl) rt := Singleton switch e.ResponseType { case jwt.ResponseTypeStream: @@ -3290,7 +3307,7 @@ func (s *Server) updateAccountClaimsWithRefresh(a *Account, ac *jwt.AccountClaim } if err := a.addServiceExportWithResponseAndAccountPos( string(e.Subject), rt, authAccounts(e.TokenReq), e.AccountTokenPosition); err != nil { - s.Debugf("Error adding service export to account [%s]: %v", a.traceLabel(), err) + s.Debugf("Error adding service export to account [%s]: %v", tl, err) continue } sub := string(e.Subject) @@ -3300,13 +3317,13 @@ func (s *Server) updateAccountClaimsWithRefresh(a *Account, ac *jwt.AccountClaim if e.Latency.Sampling == jwt.Headers { hdrNote = " (using headers)" } - s.Debugf("Error adding latency tracking%s for service export to account [%s]: %v", hdrNote, a.traceLabel(), err) + s.Debugf("Error adding latency tracking%s for service export to account [%s]: %v", hdrNote, tl, err) } } if e.ResponseThreshold != 0 { // Response threshold was set in options. if err := a.SetServiceExportResponseThreshold(sub, e.ResponseThreshold); err != nil { - s.Debugf("Error adding service export response threshold for [%s]: %v", a.traceLabel(), err) + s.Debugf("Error adding service export response threshold for [%s]: %v", tl, err) } } } @@ -3351,34 +3368,31 @@ func (s *Server) updateAccountClaimsWithRefresh(a *Account, ac *jwt.AccountClaim } var incompleteImports []*jwt.Import for _, i := range ac.Imports { - // check tmpAccounts with priority - var acc *Account - var err error - if v, ok := s.tmpAccounts.Load(i.Account); ok { - acc = v.(*Account) - } else { - acc, err = s.lookupAccount(i.Account) - } + acc, err := s.lookupAccount(i.Account) if acc == nil || err != nil { s.Errorf("Can't locate account [%s] for import of [%v] %s (err=%v)", i.Account, i.Subject, i.Type, err) incompleteImports = append(incompleteImports, i) continue } - from := string(i.Subject) - to := i.GetTo() + // Capture trace labels. + acc.mu.RLock() + atl := acc.traceLabel() + acc.mu.RUnlock() + // Grab from and to + from, to := string(i.Subject), i.GetTo() switch i.Type { case jwt.Stream: if i.LocalSubject != _EMPTY_ { // set local subject implies to is empty to = string(i.LocalSubject) - s.Debugf("Adding stream import %s:%q for %s:%q", acc.traceLabel(), from, a.traceLabel(), to) + s.Debugf("Adding stream import %s:%q for %s:%q", atl, from, tl, to) err = a.AddMappedStreamImportWithClaim(acc, from, to, i) } else { - s.Debugf("Adding stream import %s:%q for %s:%q", acc.traceLabel(), from, a.traceLabel(), to) + s.Debugf("Adding stream import %s:%q for %s:%q", atl, from, tl, to) err = a.AddStreamImportWithClaim(acc, from, to, i) } if err != nil { - s.Debugf("Error adding stream import to account [%s]: %v", a.traceLabel(), err.Error()) + s.Debugf("Error adding stream import to account [%s]: %v", tl, err.Error()) incompleteImports = append(incompleteImports, i) } case jwt.Service: @@ -3386,9 +3400,9 @@ func (s *Server) updateAccountClaimsWithRefresh(a *Account, ac *jwt.AccountClaim from = string(i.LocalSubject) to = string(i.Subject) } - s.Debugf("Adding service import %s:%q for %s:%q", acc.traceLabel(), from, a.traceLabel(), to) + s.Debugf("Adding service import %s:%q for %s:%q", atl, from, tl, to) if err := a.AddServiceImportWithClaim(acc, from, to, i); err != nil { - s.Debugf("Error adding service import to account [%s]: %v", a.traceLabel(), err.Error()) + s.Debugf("Error adding service import to account [%s]: %v", tl, err.Error()) incompleteImports = append(incompleteImports, i) } } @@ -3559,7 +3573,7 @@ func (s *Server) updateAccountClaimsWithRefresh(a *Account, ac *jwt.AccountClaim // regardless of enabled or disabled. It handles both cases. if jsEnabled { if err := s.configJetStream(a); err != nil { - s.Errorf("Error configuring jetstream for account [%s]: %v", a.traceLabel(), err.Error()) + s.Errorf("Error configuring jetstream for account [%s]: %v", tl, err.Error()) a.mu.Lock() // Absent reload of js server cfg, this is going to be broken until js is disabled a.incomplete = true @@ -3582,6 +3596,14 @@ func (s *Server) updateAccountClaimsWithRefresh(a *Account, ac *jwt.AccountClaim } c.mu.Lock() c.applyAccountLimits() + // if we have an nkey user we are a callout user - save + // the issuedAt, and nkey user id to honor revocations + var nkeyUserID string + var issuedAt int64 + if c.user != nil { + issuedAt = c.user.Issued + nkeyUserID = c.user.Nkey + } theJWT := c.opts.JWT c.mu.Unlock() // Check for being revoked here. We use ac one to avoid the account lock. @@ -3600,6 +3622,27 @@ func (s *Server) updateAccountClaimsWithRefresh(a *Account, ac *jwt.AccountClaim continue } } + + // if we extracted nkeyUserID and issuedAt we are a callout type + // calloutIAT should only be set if we are in callout scenario as + // the user JWT is _NOT_ associated with the client for callouts, + // so we rely on the calloutIAT to know when the JWT was issued + // revocations simply state that JWT issued before or by that date + // are not valid + if ac.Revocations != nil && nkeyUserID != _EMPTY_ && issuedAt > 0 { + seconds, ok := ac.Revocations[jwt.All] + if ok && seconds >= issuedAt { + c.sendErrAndDebug("User Authentication Revoked") + c.closeConnection(Revocation) + continue + } + seconds, ok = ac.Revocations[nkeyUserID] + if ok && seconds >= issuedAt { + c.sendErrAndDebug("User Authentication Revoked") + c.closeConnection(Revocation) + continue + } + } } // Check if the signing keys changed, might have to evict @@ -3667,8 +3710,13 @@ func (s *Server) buildInternalAccount(ac *jwt.AccountClaims) *Account { // We don't want to register an account that is in the process of // being built, however, to solve circular import dependencies, we // need to store it here. - s.tmpAccounts.Store(ac.Subject, acc) + if v, loaded := s.tmpAccounts.LoadOrStore(ac.Subject, acc); loaded { + return v.(*Account) + } + + // Update based on claims. s.UpdateAccountClaims(acc, ac) + return acc } @@ -3708,7 +3756,7 @@ func buildPermissionsFromJwt(uc *jwt.Permissions) *Permissions { // Helper to build internal NKeyUser. func buildInternalNkeyUser(uc *jwt.UserClaims, acts map[string]struct{}, acc *Account) *NkeyUser { - nu := &NkeyUser{Nkey: uc.Subject, Account: acc, AllowedConnectionTypes: acts} + nu := &NkeyUser{Nkey: uc.Subject, Account: acc, AllowedConnectionTypes: acts, Issued: uc.IssuedAt} if uc.IssuerAccount != _EMPTY_ { nu.SigningKey = uc.Issuer } diff --git a/vendor/github.com/nats-io/nats-server/v2/server/auth.go b/vendor/github.com/nats-io/nats-server/v2/server/auth.go index 9710634345..716ecbfb4d 100644 --- a/vendor/github.com/nats-io/nats-server/v2/server/auth.go +++ b/vendor/github.com/nats-io/nats-server/v2/server/auth.go @@ -60,6 +60,7 @@ type ClientAuthentication interface { // NkeyUser is for multiple nkey based users type NkeyUser struct { Nkey string `json:"user"` + Issued int64 `json:"issued,omitempty"` // this is a copy of the issued at (iat) field in the jwt Permissions *Permissions `json:"permissions,omitempty"` Account *Account `json:"account,omitempty"` SigningKey string `json:"signing_key,omitempty"` diff --git a/vendor/github.com/nats-io/nats-server/v2/server/client.go b/vendor/github.com/nats-io/nats-server/v2/server/client.go index 7171375e44..99134bd0c5 100644 --- a/vendor/github.com/nats-io/nats-server/v2/server/client.go +++ b/vendor/github.com/nats-io/nats-server/v2/server/client.go @@ -847,7 +847,7 @@ func (c *client) applyAccountLimits() { c.msubs = jwt.NoLimit if c.opts.JWT != _EMPTY_ { // user jwt implies account if uc, _ := jwt.DecodeUserClaims(c.opts.JWT); uc != nil { - c.mpay = int32(uc.Limits.Payload) + atomic.StoreInt32(&c.mpay, int32(uc.Limits.Payload)) c.msubs = int32(uc.Limits.Subs) if uc.IssuerAccount != _EMPTY_ && uc.IssuerAccount != uc.Issuer { if scope, ok := c.acc.signingKeys[uc.Issuer]; ok { @@ -2914,8 +2914,11 @@ func (c *client) addShadowSubscriptions(acc *Account, sub *subscription, enact b // Add in the shadow subscription. func (c *client) addShadowSub(sub *subscription, ime *ime, enact bool) (*subscription, error) { - im := ime.im + c.mu.Lock() nsub := *sub // copy + c.mu.Unlock() + + im := ime.im nsub.im = im if !im.usePub && ime.dyn && im.tr != nil { @@ -2950,8 +2953,10 @@ func (c *client) addShadowSub(sub *subscription, ime *ime, enact bool) (*subscri return nil, fmt.Errorf(errs) } - // Update our route map here. - c.srv.updateRemoteSubscription(im.acc, &nsub, 1) + // Update our route map here. But only if we are not a leaf node or a hub leafnode. + if c.kind != LEAF || c.isHubLeafNode() { + c.srv.updateRemoteSubscription(im.acc, &nsub, 1) + } return &nsub, nil } @@ -5228,48 +5233,51 @@ func (c *client) closeConnection(reason ClosedState) { // Unregister srv.removeClient(c) - // Update remote subscriptions. - if acc != nil && (kind == CLIENT || kind == LEAF || kind == JETSTREAM) { - qsubs := map[string]*qsub{} - for _, sub := range subs { - // Call unsubscribe here to cleanup shadow subscriptions and such. - c.unsubscribe(acc, sub, true, false) - // Update route as normal for a normal subscriber. - if sub.queue == nil { - if !spoke { - srv.updateRouteSubscriptionMap(acc, sub, -1) - if srv.gateway.enabled { - srv.gatewayUpdateSubInterest(acc.Name, sub, -1) + if acc != nil { + // Update remote subscriptions. + if kind == CLIENT || kind == LEAF || kind == JETSTREAM { + qsubs := map[string]*qsub{} + for _, sub := range subs { + // Call unsubscribe here to cleanup shadow subscriptions and such. + c.unsubscribe(acc, sub, true, false) + // Update route as normal for a normal subscriber. + if sub.queue == nil { + if !spoke { + srv.updateRouteSubscriptionMap(acc, sub, -1) + if srv.gateway.enabled { + srv.gatewayUpdateSubInterest(acc.Name, sub, -1) + } + } + acc.updateLeafNodes(sub, -1) + } else { + // We handle queue subscribers special in case we + // have a bunch we can just send one update to the + // connected routes. + num := int32(1) + if kind == LEAF { + num = sub.qw + } + key := keyFromSub(sub) + if esub, ok := qsubs[key]; ok { + esub.n += num + } else { + qsubs[key] = &qsub{sub, num} } } - acc.updateLeafNodes(sub, -1) - } else { - // We handle queue subscribers special in case we - // have a bunch we can just send one update to the - // connected routes. - num := int32(1) - if kind == LEAF { - num = sub.qw - } - // TODO(dlc) - Better to use string builder? - key := bytesToString(sub.subject) + " " + bytesToString(sub.queue) - if esub, ok := qsubs[key]; ok { - esub.n += num - } else { - qsubs[key] = &qsub{sub, num} + } + // Process any qsubs here. + for _, esub := range qsubs { + if !spoke { + srv.updateRouteSubscriptionMap(acc, esub.sub, -(esub.n)) + if srv.gateway.enabled { + srv.gatewayUpdateSubInterest(acc.Name, esub.sub, -(esub.n)) + } } + acc.updateLeafNodes(esub.sub, -(esub.n)) } } - // Process any qsubs here. - for _, esub := range qsubs { - if !spoke { - srv.updateRouteSubscriptionMap(acc, esub.sub, -(esub.n)) - if srv.gateway.enabled { - srv.gatewayUpdateSubInterest(acc.Name, esub.sub, -(esub.n)) - } - } - acc.updateLeafNodes(esub.sub, -(esub.n)) - } + // Always remove from the account, otherwise we can leak clients. + // Note that SYSTEM and ACCOUNT types from above cleanup their own subs. if prev := acc.removeClient(c); prev == 1 { srv.decActiveAccounts() } @@ -5419,7 +5427,7 @@ func (c *client) getAccAndResultFromCache() (*Account, *SublistResult) { if genid := atomic.LoadUint64(&sl.genid); genid != pac.genid { ok = false - delete(c.in.pacache, bytesToString(c.pa.pacache)) + c.in.pacache = make(map[string]*perAccountCache) } else { acc = pac.acc r = pac.results diff --git a/vendor/github.com/nats-io/nats-server/v2/server/const.go b/vendor/github.com/nats-io/nats-server/v2/server/const.go index 0e8540f091..0a60902657 100644 --- a/vendor/github.com/nats-io/nats-server/v2/server/const.go +++ b/vendor/github.com/nats-io/nats-server/v2/server/const.go @@ -14,6 +14,7 @@ package server import ( + "runtime/debug" "time" ) @@ -33,15 +34,28 @@ const ( ) var ( - // gitCommit injected at build - gitCommit string + // gitCommit and serverVersion injected at build. + gitCommit, serverVersion string // trustedKeys is a whitespace separated array of trusted operator's public nkeys. trustedKeys string ) +func init() { + // Use build info if present, it would be if building using 'go build .' + // or when using a release. + if info, ok := debug.ReadBuildInfo(); ok { + for _, setting := range info.Settings { + switch setting.Key { + case "vcs.revision": + gitCommit = setting.Value[:7] + } + } + } +} + const ( // VERSION is the current version for the server. - VERSION = "2.10.16" + VERSION = "2.10.18" // PROTO is the currently supported protocol. // 0 was the original diff --git a/vendor/github.com/nats-io/nats-server/v2/server/consumer.go b/vendor/github.com/nats-io/nats-server/v2/server/consumer.go index 34cac17007..f797073275 100644 --- a/vendor/github.com/nats-io/nats-server/v2/server/consumer.go +++ b/vendor/github.com/nats-io/nats-server/v2/server/consumer.go @@ -711,7 +711,7 @@ func (mset *stream) addConsumerWithAssignment(config *ConsumerConfig, oname stri } mset.mu.RLock() - s, jsa, tierName, cfg, acc := mset.srv, mset.jsa, mset.tier, mset.cfg, mset.acc + s, jsa, cfg, acc := mset.srv, mset.jsa, mset.cfg, mset.acc retention := cfg.Retention mset.mu.RUnlock() @@ -726,10 +726,8 @@ func (mset *stream) addConsumerWithAssignment(config *ConsumerConfig, oname stri return nil, NewJSConsumerConfigRequiredError() } - jsa.usageMu.RLock() - selectedLimits, limitsFound := jsa.limits[tierName] - jsa.usageMu.RUnlock() - if !limitsFound { + selectedLimits, _, _, _ := acc.selectLimits(config.replicas(&cfg)) + if selectedLimits == nil { return nil, NewJSNoLimitsError() } @@ -737,10 +735,10 @@ func (mset *stream) addConsumerWithAssignment(config *ConsumerConfig, oname stri // Make sure we have sane defaults. Do so with the JS lock, otherwise a // badly timed meta snapshot can result in a race condition. mset.js.mu.Lock() - setConsumerConfigDefaults(config, &mset.cfg, srvLim, &selectedLimits) + setConsumerConfigDefaults(config, &mset.cfg, srvLim, selectedLimits) mset.js.mu.Unlock() - if err := checkConsumerCfg(config, srvLim, &cfg, acc, &selectedLimits, isRecovering); err != nil { + if err := checkConsumerCfg(config, srvLim, &cfg, acc, selectedLimits, isRecovering); err != nil { return nil, err } sampleFreq := 0 @@ -2111,19 +2109,17 @@ func (o *consumer) loopAndForwardProposals(qch chan struct{}) { const maxBatch = 256 * 1024 var entries []*Entry for sz := 0; proposal != nil; proposal = proposal.next { - entry := entryPool.Get().(*Entry) - entry.Type, entry.Data = EntryNormal, proposal.data - entries = append(entries, entry) + entries = append(entries, newEntry(EntryNormal, proposal.data)) sz += len(proposal.data) if sz > maxBatch { - node.ProposeDirect(entries) + node.ProposeMulti(entries) // We need to re-create `entries` because there is a reference // to it in the node's pae map. sz, entries = 0, nil } } if len(entries) > 0 { - node.ProposeDirect(entries) + node.ProposeMulti(entries) } return nil } @@ -2146,22 +2142,18 @@ func (o *consumer) loopAndForwardProposals(qch chan struct{}) { // Lock should be held. func (o *consumer) propose(entry []byte) { - var notify bool p := &proposal{data: entry} if o.phead == nil { o.phead = p - notify = true } else { o.ptail.next = p } o.ptail = p - // Kick our looper routine if needed. - if notify { - select { - case o.pch <- struct{}{}: - default: - } + // Kick our looper routine. + select { + case o.pch <- struct{}{}: + default: } } @@ -2633,17 +2625,24 @@ func (o *consumer) infoWithSnapAndReply(snap bool, reply string) *ConsumerInfo { TimeStamp: time.Now().UTC(), } - // If we are replicated and we are not the leader we need to pull certain data from our store. - if rg != nil && rg.node != nil && !o.isLeader() && o.store != nil { + // If we are replicated and we are not the leader or we are filtered, we need to pull certain data from our store. + isLeader := o.isLeader() + if rg != nil && rg.node != nil && o.store != nil && (!isLeader || o.isFiltered()) { state, err := o.store.BorrowState() if err != nil { o.mu.Unlock() return nil } - info.Delivered.Consumer, info.Delivered.Stream = state.Delivered.Consumer, state.Delivered.Stream - info.AckFloor.Consumer, info.AckFloor.Stream = state.AckFloor.Consumer, state.AckFloor.Stream - info.NumAckPending = len(state.Pending) - info.NumRedelivered = len(state.Redelivered) + if !isLeader { + info.Delivered.Consumer, info.Delivered.Stream = state.Delivered.Consumer, state.Delivered.Stream + info.AckFloor.Consumer, info.AckFloor.Stream = state.AckFloor.Consumer, state.AckFloor.Stream + info.NumAckPending = len(state.Pending) + info.NumRedelivered = len(state.Redelivered) + } else { + // Since we are filtered and we are the leader we could have o.sseq that is skipped ahead. + // To maintain consistency in reporting (e.g. jsz) we take the state for our delivered stream sequence. + info.Delivered.Stream = state.Delivered.Stream + } } // Adjust active based on non-zero etc. Also make UTC here. @@ -2742,6 +2741,12 @@ func (o *consumer) processAckMsg(sseq, dseq, dc uint64, reply string, doSample b return } + // Check if this ack is above the current pointer to our next to deliver. + // This could happen on a cooperative takeover with high speed deliveries. + if sseq >= o.sseq { + o.sseq = sseq + 1 + } + mset := o.mset if mset == nil || mset.closed.Load() { o.mu.Unlock() @@ -2763,8 +2768,12 @@ func (o *consumer) processAckMsg(sseq, dseq, dc uint64, reply string, doSample b delete(o.pending, sseq) // Use the original deliver sequence from our pending record. dseq = p.Sequence + // Only move floors if we matched an existing pending. - if dseq == o.adflr+1 { + if len(o.pending) == 0 { + o.adflr = o.dseq - 1 + o.asflr = o.sseq - 1 + } else if dseq == o.adflr+1 { o.adflr, o.asflr = dseq, sseq for ss := sseq + 1; ss < o.sseq; ss++ { if p, ok := o.pending[ss]; ok { @@ -2775,11 +2784,6 @@ func (o *consumer) processAckMsg(sseq, dseq, dc uint64, reply string, doSample b } } } - // If nothing left set consumer to current delivered. - // Do not update stream. - if len(o.pending) == 0 { - o.adflr = o.dseq - 1 - } } delete(o.rdc, sseq) o.removeFromRedeliverQueue(sseq) @@ -4150,7 +4154,8 @@ func (o *consumer) checkNumPending() uint64 { if o.mset != nil { var state StreamState o.mset.store.FastState(&state) - if o.sseq > state.LastSeq && o.npc != 0 || o.npc > int64(state.Msgs) { + npc := o.numPending() + if o.sseq > state.LastSeq && npc > 0 || npc > state.Msgs { // Re-calculate. o.streamNumPending() } @@ -4318,7 +4323,7 @@ func (o *consumer) deliverMsg(dsubj, ackReply string, pmsg *jsPubMsg, dc uint64, // If we are ack none and mset is interest only we should make sure stream removes interest. if ap == AckNone && rp != LimitsPolicy { - if o.node == nil || o.cfg.Direct { + if mset != nil && mset.ackq != nil && (o.node == nil || o.cfg.Direct) { mset.ackq.push(seq) } else { o.updateAcks(dseq, seq, _EMPTY_) @@ -5218,18 +5223,19 @@ func (o *consumer) stopWithFlags(dflag, sdflag, doSignal, advisory bool) error { // ignoreInterest marks whether the consumer should be ignored when determining interest. // No lock held on entry. func (o *consumer) cleanupNoInterestMessages(mset *stream, ignoreInterest bool) { - state := mset.state() - stop := state.LastSeq o.mu.Lock() if !o.isLeader() { - o.readStoredState(stop) + o.readStoredState(0) } start := o.asflr o.mu.Unlock() + // Make sure we start at worst with first sequence in the stream. + state := mset.state() if start < state.FirstSeq { start = state.FirstSeq } + stop := state.LastSeq // Consumer's interests are ignored by default. If we should not ignore interest, unset. co := o @@ -5238,13 +5244,37 @@ func (o *consumer) cleanupNoInterestMessages(mset *stream, ignoreInterest bool) } var rmseqs []uint64 - mset.mu.Lock() + mset.mu.RLock() + + // If over this amount of messages to check, defer to checkInterestState() which + // will do the right thing since we are now removed. + // TODO(dlc) - Better way? + const bailThresh = 100_000 + + // Check if we would be spending too much time here and defer to separate go routine. + if len(mset.consumers) == 0 { + mset.mu.RUnlock() + mset.mu.Lock() + defer mset.mu.Unlock() + mset.store.Purge() + var state StreamState + mset.store.FastState(&state) + mset.lseq = state.LastSeq + // Also make sure we clear any pending acks. + mset.clearAllPreAcksBelowFloor(state.FirstSeq) + return + } else if stop-start > bailThresh { + mset.mu.RUnlock() + go mset.checkInterestState() + return + } + for seq := start; seq <= stop; seq++ { if mset.noInterest(seq, co) { rmseqs = append(rmseqs, seq) } } - mset.mu.Unlock() + mset.mu.RUnlock() // These can be removed. for _, seq := range rmseqs { @@ -5478,10 +5508,13 @@ func (o *consumer) checkStateForInterestStream() error { o.mu.RUnlock() // If we have pending, we will need to walk through to delivered in case we missed any of those acks as well. - if state != nil && len(state.Pending) > 0 { + if state != nil && len(state.Pending) > 0 && state.AckFloor.Stream > 0 { for seq := state.AckFloor.Stream + 1; seq <= state.Delivered.Stream; seq++ { if _, ok := state.Pending[seq]; !ok { - mset.ackMsg(o, seq) + // Want to call needAck since it is filter aware. + if o.needAck(seq, _EMPTY_) { + mset.ackMsg(o, seq) + } } } } diff --git a/vendor/github.com/nats-io/nats-server/v2/server/events.go b/vendor/github.com/nats-io/nats-server/v2/server/events.go index d165b9ef20..2ebfc5ebac 100644 --- a/vendor/github.com/nats-io/nats-server/v2/server/events.go +++ b/vendor/github.com/nats-io/nats-server/v2/server/events.go @@ -98,6 +98,12 @@ const ( // FIXME(dlc) - make configurable. var eventsHBInterval = 30 * time.Second +// Default minimum wait time for sending statsz +const defaultStatszRateLimit = 1 * time.Second + +// Variable version so we can set in tests. +var statszRateLimit = defaultStatszRateLimit + type sysMsgHandler func(sub *subscription, client *client, acc *Account, subject, reply string, hdr, msg []byte) // Used if we have to queue things internally to avoid the route/gw path. @@ -134,6 +140,7 @@ type internal struct { shash string inboxPre string remoteStatsSub *subscription + lastStatsz time.Time } // ServerStatsMsg is sent periodically with stats updates. @@ -807,6 +814,10 @@ func (s *Server) sendStatsz(subj string) { var m ServerStatsMsg s.updateServerUsage(&m.Stats) + if s.limitStatsz(subj) { + return + } + s.mu.RLock() defer s.mu.RUnlock() @@ -948,6 +959,35 @@ func (s *Server) sendStatsz(subj string) { s.sendInternalMsg(subj, _EMPTY_, &m.Server, &m) } +// Limit updates to the heartbeat interval, max one second by default. +func (s *Server) limitStatsz(subj string) bool { + s.mu.Lock() + defer s.mu.Unlock() + + if s.sys == nil { + return true + } + + // Only limit the normal broadcast subject. + if subj != fmt.Sprintf(serverStatsSubj, s.ID()) { + return false + } + + interval := statszRateLimit + if s.sys.cstatsz < interval { + interval = s.sys.cstatsz + } + if time.Since(s.sys.lastStatsz) < interval { + // Reschedule heartbeat for the next interval. + if s.sys.stmr != nil { + s.sys.stmr.Reset(time.Until(s.sys.lastStatsz.Add(interval))) + } + return true + } + s.sys.lastStatsz = time.Now() + return false +} + // Send out our statz update. // This should be wrapChk() to setup common locking. func (s *Server) heartbeatStatsz() { @@ -965,6 +1005,12 @@ func (s *Server) heartbeatStatsz() { go s.sendStatszUpdate() } +// Reset statsz rate limit for the next broadcast. +// This should be wrapChk() to setup common locking. +func (s *Server) resetLastStatsz() { + s.sys.lastStatsz = time.Time{} +} + func (s *Server) sendStatszUpdate() { s.sendStatsz(fmt.Sprintf(serverStatsSubj, s.ID())) } @@ -1019,44 +1065,56 @@ func (s *Server) Node() string { // Tradeoff is subscription and interest graph events vs connect and // disconnect events, etc. func (s *Server) initEventTracking() { - if !s.EventsEnabled() { + // Capture sys in case we are shutdown while setting up. + s.mu.RLock() + sys := s.sys + s.mu.RUnlock() + + if sys == nil || sys.client == nil || sys.account == nil { return } // Create a system hash which we use for other servers to target us specifically. - s.sys.shash = getHash(s.info.Name) + sys.shash = getHash(s.info.Name) // This will be for all inbox responses. - subject := fmt.Sprintf(inboxRespSubj, s.sys.shash, "*") + subject := fmt.Sprintf(inboxRespSubj, sys.shash, "*") if _, err := s.sysSubscribe(subject, s.inboxReply); err != nil { s.Errorf("Error setting up internal tracking: %v", err) + return } - s.sys.inboxPre = subject + sys.inboxPre = subject // This is for remote updates for connection accounting. subject = fmt.Sprintf(accConnsEventSubjOld, "*") if _, err := s.sysSubscribe(subject, s.noInlineCallback(s.remoteConnsUpdate)); err != nil { s.Errorf("Error setting up internal tracking for %s: %v", subject, err) + return } // This will be for responses for account info that we send out. subject = fmt.Sprintf(connsRespSubj, s.info.ID) if _, err := s.sysSubscribe(subject, s.noInlineCallback(s.remoteConnsUpdate)); err != nil { s.Errorf("Error setting up internal tracking: %v", err) + return } // Listen for broad requests to respond with number of subscriptions for a given subject. if _, err := s.sysSubscribe(accNumSubsReqSubj, s.noInlineCallback(s.nsubsRequest)); err != nil { s.Errorf("Error setting up internal tracking: %v", err) + return } // Listen for statsz from others. subject = fmt.Sprintf(serverStatsSubj, "*") if sub, err := s.sysSubscribe(subject, s.noInlineCallback(s.remoteServerUpdate)); err != nil { s.Errorf("Error setting up internal tracking: %v", err) + return } else { // Keep track of this one. - s.sys.remoteStatsSub = sub + sys.remoteStatsSub = sub } + // Listen for all server shutdowns. subject = fmt.Sprintf(shutdownEventSubj, "*") if _, err := s.sysSubscribe(subject, s.noInlineCallback(s.remoteServerShutdown)); err != nil { s.Errorf("Error setting up internal tracking: %v", err) + return } // Listen for servers entering lame-duck mode. // NOTE: This currently is handled in the same way as a server shutdown, but has @@ -1064,6 +1122,7 @@ func (s *Server) initEventTracking() { subject = fmt.Sprintf(lameDuckEventSubj, "*") if _, err := s.sysSubscribe(subject, s.noInlineCallback(s.remoteServerShutdown)); err != nil { s.Errorf("Error setting up internal tracking: %v", err) + return } // Listen for account claims updates. subscribeToUpdate := true @@ -1074,6 +1133,7 @@ func (s *Server) initEventTracking() { for _, sub := range []string{accUpdateEventSubjOld, accUpdateEventSubjNew} { if _, err := s.sysSubscribe(fmt.Sprintf(sub, "*"), s.noInlineCallback(s.accountClaimUpdate)); err != nil { s.Errorf("Error setting up internal tracking: %v", err) + return } } } @@ -1081,6 +1141,7 @@ func (s *Server) initEventTracking() { // This subscription is kept for backwards compatibility. Got replaced by ...PING.STATZ from below if _, err := s.sysSubscribe(serverStatsPingReqSubj, s.noInlineCallback(s.statszReq)); err != nil { s.Errorf("Error setting up internal tracking: %v", err) + return } monSrvc := map[string]sysMsgHandler{ "IDZ": s.idzReq, @@ -1134,10 +1195,12 @@ func (s *Server) initEventTracking() { subject = fmt.Sprintf(serverDirectReqSubj, s.info.ID, name) if _, err := s.sysSubscribe(subject, s.noInlineCallback(req)); err != nil { s.Errorf("Error setting up internal tracking: %v", err) + return } subject = fmt.Sprintf(serverPingReqSubj, name) if _, err := s.sysSubscribe(subject, s.noInlineCallback(req)); err != nil { s.Errorf("Error setting up internal tracking: %v", err) + return } } extractAccount := func(subject string) (string, error) { @@ -1230,6 +1293,7 @@ func (s *Server) initEventTracking() { for name, req := range monAccSrvc { if _, err := s.sysSubscribe(fmt.Sprintf(accDirectReqSubj, "*", name), s.noInlineCallback(req)); err != nil { s.Errorf("Error setting up internal tracking: %v", err) + return } } @@ -1238,6 +1302,7 @@ func (s *Server) initEventTracking() { // is only one that will answer. This breaks tests since we still forward on remote server connect. if _, err := s.sysSubscribe(fmt.Sprintf(userDirectReqSubj, "*"), s.userInfoReq); err != nil { s.Errorf("Error setting up internal tracking: %v", err) + return } // For now only the STATZ subject has an account specific ping equivalent. @@ -1255,6 +1320,7 @@ func (s *Server) initEventTracking() { }) })); err != nil { s.Errorf("Error setting up internal tracking: %v", err) + return } // Listen for updates when leaf nodes connect for a given account. This will @@ -1262,32 +1328,38 @@ func (s *Server) initEventTracking() { subject = fmt.Sprintf(leafNodeConnectEventSubj, "*") if _, err := s.sysSubscribe(subject, s.noInlineCallback(s.leafNodeConnected)); err != nil { s.Errorf("Error setting up internal tracking: %v", err) + return } // For tracking remote latency measurements. - subject = fmt.Sprintf(remoteLatencyEventSubj, s.sys.shash) + subject = fmt.Sprintf(remoteLatencyEventSubj, sys.shash) if _, err := s.sysSubscribe(subject, s.noInlineCallback(s.remoteLatencyUpdate)); err != nil { s.Errorf("Error setting up internal latency tracking: %v", err) + return } // This is for simple debugging of number of subscribers that exist in the system. if _, err := s.sysSubscribeInternal(accSubsSubj, s.noInlineCallback(s.debugSubscribers)); err != nil { s.Errorf("Error setting up internal debug service for subscribers: %v", err) + return } // Listen for requests to reload the server configuration. subject = fmt.Sprintf(serverReloadReqSubj, s.info.ID) if _, err := s.sysSubscribe(subject, s.noInlineCallback(s.reloadConfig)); err != nil { s.Errorf("Error setting up server reload handler: %v", err) + return } // Client connection kick subject = fmt.Sprintf(clientKickReqSubj, s.info.ID) if _, err := s.sysSubscribe(subject, s.noInlineCallback(s.kickClient)); err != nil { s.Errorf("Error setting up client kick service: %v", err) + return } // Client connection LDM subject = fmt.Sprintf(clientLDMReqSubj, s.info.ID) if _, err := s.sysSubscribe(subject, s.noInlineCallback(s.ldmClient)); err != nil { s.Errorf("Error setting up client LDM service: %v", err) + return } } @@ -1868,6 +1940,7 @@ func (s *Server) statszReq(sub *subscription, c *client, _ *Account, subject, re // No reply is a signal that we should use our normal broadcast subject. if reply == _EMPTY_ { reply = fmt.Sprintf(serverStatsSubj, s.info.ID) + s.wrapChk(s.resetLastStatsz) } opts := StatszEventOptions{} diff --git a/vendor/github.com/nats-io/nats-server/v2/server/filestore.go b/vendor/github.com/nats-io/nats-server/v2/server/filestore.go index 188de4aca5..e059d28bec 100644 --- a/vendor/github.com/nats-io/nats-server/v2/server/filestore.go +++ b/vendor/github.com/nats-io/nats-server/v2/server/filestore.go @@ -214,7 +214,7 @@ type msgBlock struct { bytes uint64 // User visible bytes count. rbytes uint64 // Total bytes (raw) including deleted. Used for rolling to new blk. msgs uint64 // User visible message count. - fss map[string]*SimpleState + fss *stree.SubjectTree[SimpleState] kfn string lwts int64 llts int64 @@ -295,13 +295,13 @@ const ( // Maximum size of a write buffer we may consider for re-use. maxBufReuse = 2 * 1024 * 1024 // default cache buffer expiration - defaultCacheBufferExpiration = 2 * time.Second + defaultCacheBufferExpiration = 10 * time.Second // default sync interval defaultSyncInterval = 2 * time.Minute // default idle timeout to close FDs. closeFDsIdle = 30 * time.Second // default expiration time for mb.fss when idle. - defaultFssExpiration = 10 * time.Second + defaultFssExpiration = 2 * time.Minute // coalesceMinimum coalesceMinimum = 16 * 1024 // maxFlushWait is maximum we will wait to gather messages to flush. @@ -1869,7 +1869,7 @@ func (mb *msgBlock) lastChecksum() []byte { mb.rbytes = uint64(fi.Size()) } if mb.rbytes < checksumSize { - return nil + return lchk[:] } // Encrypted? // Check for encryption, we do not load keys on startup anymore so might need to load them here. @@ -2063,11 +2063,13 @@ func (fs *fileStore) expireMsgsOnRecover() { } // Make sure we do subject cleanup as well. mb.ensurePerSubjectInfoLoaded() - for subj, ss := range mb.fss { + mb.fss.Iter(func(bsubj []byte, ss *SimpleState) bool { + subj := bytesToString(bsubj) for i := uint64(0); i < ss.Msgs; i++ { fs.removePerSubject(subj) } - } + return true + }) mb.dirtyCloseWithRemove(true) deleted++ } @@ -2314,9 +2316,21 @@ func (mb *msgBlock) firstMatching(filter string, wc bool, start uint64, sm *Stor // Mark fss activity. mb.lsts = time.Now().UnixNano() + if filter == _EMPTY_ { + filter = fwcs + wc = true + } + // If we only have 1 subject currently and it matches our filter we can also set isAll. - if !isAll && len(mb.fss) == 1 { - _, isAll = mb.fss[filter] + if !isAll && mb.fss.Size() == 1 { + if !wc { + _, isAll = mb.fss.Find(stringToBytes(filter)) + } else { + // Since mb.fss.Find won't work if filter is a wildcard, need to use Match instead. + mb.fss.Match(stringToBytes(filter), func(subject []byte, _ *SimpleState) { + isAll = true + }) + } } // Make sure to start at mb.first.seq if fseq < mb.first.seq if seq := atomic.LoadUint64(&mb.first.seq); seq > fseq { @@ -2325,16 +2339,15 @@ func (mb *msgBlock) firstMatching(filter string, wc bool, start uint64, sm *Stor lseq := atomic.LoadUint64(&mb.last.seq) // Optionally build the isMatch for wildcard filters. - tsa := [32]string{} - fsa := [32]string{} - var fts []string + _tsa, _fsa := [32]string{}, [32]string{} + tsa, fsa := _tsa[:0], _fsa[:0] var isMatch func(subj string) bool // Decide to build. if wc { - fts = tokenizeSubjectIntoSlice(fsa[:0], filter) + fsa = tokenizeSubjectIntoSlice(fsa[:0], filter) isMatch = func(subj string) bool { - tts := tokenizeSubjectIntoSlice(tsa[:0], subj) - return isSubsetMatchTokenized(tts, fts) + tsa = tokenizeSubjectIntoSlice(tsa[:0], subj) + return isSubsetMatchTokenized(tsa, fsa) } } @@ -2344,19 +2357,18 @@ func (mb *msgBlock) firstMatching(filter string, wc bool, start uint64, sm *Stor // If we do not think we should do a linear scan check how many fss we // would need to scan vs the full range of the linear walk. Optimize for // 25th quantile of a match in a linear walk. Filter should be a wildcard. - if !doLinearScan && wc { - doLinearScan = len(mb.fss)*4 > int(lseq-fseq) + // We should consult fss if our cache is not loaded and we only have fss loaded. + if !doLinearScan && wc && mb.cacheAlreadyLoaded() { + doLinearScan = mb.fss.Size()*4 > int(lseq-fseq) } if !doLinearScan { // If we have a wildcard match against all tracked subjects we know about. if wc { subs = subs[:0] - for subj := range mb.fss { - if isMatch(subj) { - subs = append(subs, subj) - } - } + mb.fss.Match(stringToBytes(filter), func(bsubj []byte, _ *SimpleState) { + subs = append(subs, string(bsubj)) + }) // Check if we matched anything if len(subs) == 0 { return nil, didLoad, ErrStoreMsgNotFound @@ -2364,7 +2376,7 @@ func (mb *msgBlock) firstMatching(filter string, wc bool, start uint64, sm *Stor } fseq = lseq + 1 for _, subj := range subs { - ss := mb.fss[subj] + ss, _ := mb.fss.Find(stringToBytes(subj)) if ss != nil && ss.firstNeedsUpdate { mb.recalculateFirstForSubj(subj, ss.First, ss) } @@ -2455,6 +2467,11 @@ func (mb *msgBlock) filteredPendingLocked(filter string, wc bool, sseq uint64) ( } } + if filter == _EMPTY_ { + filter = fwcs + wc = true + } + update := func(ss *SimpleState) { total += ss.Msgs if first == 0 || ss.First < first { @@ -2468,9 +2485,9 @@ func (mb *msgBlock) filteredPendingLocked(filter string, wc bool, sseq uint64) ( // Make sure we have fss loaded. mb.ensurePerSubjectInfoLoaded() - tsa := [32]string{} - fsa := [32]string{} - fts := tokenizeSubjectIntoSlice(fsa[:0], filter) + _tsa, _fsa := [32]string{}, [32]string{} + tsa, fsa := _tsa[:0], _fsa[:0] + fsa = tokenizeSubjectIntoSlice(fsa[:0], filter) // 1. See if we match any subs from fss. // 2. If we match and the sseq is past ss.Last then we can use meta only. @@ -2480,25 +2497,26 @@ func (mb *msgBlock) filteredPendingLocked(filter string, wc bool, sseq uint64) ( if !wc { return subj == filter } - tts := tokenizeSubjectIntoSlice(tsa[:0], subj) - return isSubsetMatchTokenized(tts, fts) + tsa = tokenizeSubjectIntoSlice(tsa[:0], subj) + return isSubsetMatchTokenized(tsa, fsa) } var havePartial bool - for subj, ss := range mb.fss { - if isAll || isMatch(subj) { - if ss.firstNeedsUpdate { - mb.recalculateFirstForSubj(subj, ss.First, ss) - } - if sseq <= ss.First { - update(ss) - } else if sseq <= ss.Last { - // We matched but its a partial. - havePartial = true - break - } + mb.fss.Match(stringToBytes(filter), func(bsubj []byte, ss *SimpleState) { + if havePartial { + // If we already found a partial then don't do anything else. + return } - } + if ss.firstNeedsUpdate { + mb.recalculateFirstForSubj(bytesToString(bsubj), ss.First, ss) + } + if sseq <= ss.First { + update(ss) + } else if sseq <= ss.Last { + // We matched but its a partial. + havePartial = true + } + }) // If we did not encounter any partials we can return here. if !havePartial { @@ -2589,9 +2607,85 @@ func (fs *fileStore) FilteredState(sseq uint64, subj string) SimpleState { return ss } +// This is used to see if we can selectively jump start blocks based on filter subject and a floor block index. +// Will return -1 if no matches at all. +func (fs *fileStore) checkSkipFirstBlock(filter string, wc bool) (int, int) { + start, stop := uint32(math.MaxUint32), uint32(0) + if wc { + fs.psim.Match(stringToBytes(filter), func(_ []byte, psi *psi) { + if psi.fblk < start { + start = psi.fblk + } + if psi.lblk > stop { + stop = psi.lblk + } + }) + } else if psi, ok := fs.psim.Find(stringToBytes(filter)); ok { + start, stop = psi.fblk, psi.lblk + } + // Nothing found. + if start == uint32(math.MaxUint32) { + return -1, -1 + } + // Here we need to translate this to index into fs.blks properly. + mb := fs.bim[start] + if mb == nil { + // psim fblk can be lazy. + i := start + 1 + for ; i <= stop; i++ { + mb = fs.bim[i] + if mb == nil { + continue + } + if _, f, _ := mb.filteredPending(filter, wc, 0); f > 0 { + break + } + } + // Update fblk since fblk was outdated. + if !wc { + if psi, ok := fs.psim.Find(stringToBytes(filter)); ok { + psi.fblk = i + } + } else { + fs.psim.Match(stringToBytes(filter), func(subj []byte, psi *psi) { + if i > psi.fblk { + psi.fblk = i + } + }) + } + } + // Still nothing. + if mb == nil { + return -1, -1 + } + // Grab first index. + fi, _ := fs.selectMsgBlockWithIndex(atomic.LoadUint64(&mb.last.seq)) + + // Grab last if applicable. + var li int + if mb = fs.bim[stop]; mb != nil { + li, _ = fs.selectMsgBlockWithIndex(atomic.LoadUint64(&mb.last.seq)) + } + + return fi, li +} + // Optimized way for getting all num pending matching a filter subject. // Lock should be held. func (fs *fileStore) numFilteredPending(filter string, ss *SimpleState) { + fs.numFilteredPendingWithLast(filter, true, ss) +} + +// Optimized way for getting all num pending matching a filter subject and first sequence only. +// Lock should be held. +func (fs *fileStore) numFilteredPendingNoLast(filter string, ss *SimpleState) { + fs.numFilteredPendingWithLast(filter, false, ss) +} + +// Optimized way for getting all num pending matching a filter subject. +// Optionally look up last sequence. Sometimes do not need last and this avoids cost. +// Lock should be held. +func (fs *fileStore) numFilteredPendingWithLast(filter string, last bool, ss *SimpleState) { isAll := filter == _EMPTY_ || filter == fwcs // If isAll we do not need to do anything special to calculate the first and last and total. @@ -2601,29 +2695,52 @@ func (fs *fileStore) numFilteredPending(filter string, ss *SimpleState) { ss.Msgs = fs.state.Msgs return } + // Always reset. + ss.First, ss.Last, ss.Msgs = 0, 0, 0 + + if filter == _EMPTY_ { + filter = fwcs + } - start, stop := uint32(math.MaxUint32), uint32(0) - fs.psim.Match(stringToBytes(filter), func(_ []byte, psi *psi) { - ss.Msgs += psi.total - // Keep track of start and stop indexes for this subject. - if psi.fblk < start { - start = psi.fblk - } - if psi.lblk > stop { - stop = psi.lblk - } - }) // We do need to figure out the first and last sequences. wc := subjectHasWildcard(filter) + start, stop := uint32(math.MaxUint32), uint32(0) + + if wc { + fs.psim.Match(stringToBytes(filter), func(_ []byte, psi *psi) { + ss.Msgs += psi.total + // Keep track of start and stop indexes for this subject. + if psi.fblk < start { + start = psi.fblk + } + if psi.lblk > stop { + stop = psi.lblk + } + }) + } else if psi, ok := fs.psim.Find(stringToBytes(filter)); ok { + ss.Msgs += psi.total + start, stop = psi.fblk, psi.lblk + } + + // Did not find anything. + if stop == 0 { + return + } + // Do start mb := fs.bim[start] if mb != nil { _, f, _ := mb.filteredPending(filter, wc, 0) ss.First = f } + if ss.First == 0 { - // This is a miss. This can happen since psi.fblk is lazy, but should be very rare. - for i := start + 1; i <= stop; i++ { + // This is a miss. This can happen since psi.fblk is lazy. + // We will make sure to update fblk. + + // Hold this outside loop for psim fblk updates when done. + i := start + 1 + for ; i <= stop; i++ { mb := fs.bim[i] if mb == nil { continue @@ -2633,11 +2750,25 @@ func (fs *fileStore) numFilteredPending(filter string, ss *SimpleState) { break } } + // Update fblk since fblk was outdated. + if !wc { + if info, ok := fs.psim.Find(stringToBytes(filter)); ok { + info.fblk = i + } + } else { + fs.psim.Match(stringToBytes(filter), func(subj []byte, psi *psi) { + if i > psi.fblk { + psi.fblk = i + } + }) + } } - // Now last - if mb = fs.bim[stop]; mb != nil { - _, _, l := mb.filteredPending(filter, wc, 0) - ss.Last = l + // Now gather last sequence if asked to do so. + if last { + if mb = fs.bim[stop]; mb != nil { + _, _, l := mb.filteredPending(filter, wc, 0) + ss.Last = l + } } } @@ -2650,6 +2781,10 @@ func (fs *fileStore) SubjectsState(subject string) map[string]SimpleState { return nil } + if subject == _EMPTY_ { + subject = fwcs + } + start, stop := fs.blks[0], fs.lmb // We can short circuit if not a wildcard using psim for start and stop. if !subjectHasWildcard(subject) { @@ -2657,7 +2792,12 @@ func (fs *fileStore) SubjectsState(subject string) map[string]SimpleState { if !ok { return nil } - start, stop = fs.bim[info.fblk], fs.bim[info.lblk] + if f := fs.bim[info.fblk]; f != nil { + start = f + } + if l := fs.bim[info.lblk]; l != nil { + stop = l + } } // Aggregate fss. @@ -2681,21 +2821,20 @@ func (fs *fileStore) SubjectsState(subject string) map[string]SimpleState { } // Mark fss activity. mb.lsts = time.Now().UnixNano() - for subj, ss := range mb.fss { - if subject == _EMPTY_ || subject == fwcs || subjectIsSubsetMatch(subj, subject) { - if ss.firstNeedsUpdate { - mb.recalculateFirstForSubj(subj, ss.First, ss) - } - oss := fss[subj] - if oss.First == 0 { // New - fss[subj] = *ss - } else { - // Merge here. - oss.Last, oss.Msgs = ss.Last, oss.Msgs+ss.Msgs - fss[subj] = oss - } + mb.fss.Match(stringToBytes(subject), func(bsubj []byte, ss *SimpleState) { + subj := string(bsubj) + if ss.firstNeedsUpdate { + mb.recalculateFirstForSubj(subj, ss.First, ss) } - } + oss := fss[subj] + if oss.First == 0 { // New + fss[subj] = *ss + } else { + // Merge here. + oss.Last, oss.Msgs = ss.Last, oss.Msgs+ss.Msgs + fss[subj] = oss + } + }) if shouldExpire { // Expire this cache before moving on. mb.tryForceExpireCacheLocked() @@ -2723,6 +2862,10 @@ func (fs *fileStore) NumPending(sseq uint64, filter string, lastPerSubject bool) return 0, validThrough } + // If sseq is less then our first set to first. + if sseq < fs.state.FirstSeq { + sseq = fs.state.FirstSeq + } // Track starting for both block for the sseq and staring block that matches any subject. var seqStart int // See if we need to figure out starting block per sseq. @@ -2734,16 +2877,14 @@ func (fs *fileStore) NumPending(sseq uint64, filter string, lastPerSubject bool) } isAll := filter == _EMPTY_ || filter == fwcs + if isAll && filter == _EMPTY_ { + filter = fwcs + } wc := subjectHasWildcard(filter) // See if filter was provided but its the only subject. if !isAll && !wc && fs.psim.Size() == 1 { - if _, ok := fs.psim.Find(stringToBytes(filter)); ok { - isAll = true - } - } - if isAll && filter == _EMPTY_ { - filter = fwcs + _, isAll = fs.psim.Find(stringToBytes(filter)) } // If we are isAll and have no deleted we can do a simpler calculation. if !lastPerSubject && isAll && (fs.state.LastSeq-fs.state.FirstSeq+1) == fs.state.Msgs { @@ -2753,8 +2894,9 @@ func (fs *fileStore) NumPending(sseq uint64, filter string, lastPerSubject bool) return fs.state.LastSeq - sseq + 1, validThrough } - var tsa, fsa [32]string - fts := tokenizeSubjectIntoSlice(fsa[:0], filter) + _tsa, _fsa := [32]string{}, [32]string{} + tsa, fsa := _tsa[:0], _fsa[:0] + fsa = tokenizeSubjectIntoSlice(fsa[:0], filter) isMatch := func(subj string) bool { if isAll { @@ -2763,8 +2905,8 @@ func (fs *fileStore) NumPending(sseq uint64, filter string, lastPerSubject bool) if !wc { return subj == filter } - tts := tokenizeSubjectIntoSlice(tsa[:0], subj) - return isSubsetMatchTokenized(tts, fts) + tsa = tokenizeSubjectIntoSlice(tsa[:0], subj) + return isSubsetMatchTokenized(tsa, fsa) } // Handle last by subject a bit differently. @@ -2864,20 +3006,22 @@ func (fs *fileStore) NumPending(sseq uint64, filter string, lastPerSubject bool) mb.lsts = time.Now().UnixNano() var havePartial bool - for subj, ss := range mb.fss { - if isMatch(subj) { - if ss.firstNeedsUpdate { - mb.recalculateFirstForSubj(subj, ss.First, ss) - } - if sseq <= ss.First { - t += ss.Msgs - } else if sseq <= ss.Last { - // We matched but its a partial. - havePartial = true - break - } + mb.fss.Match(stringToBytes(filter), func(bsubj []byte, ss *SimpleState) { + if havePartial { + // If we already found a partial then don't do anything else. + return } - } + subj := bytesToString(bsubj) + if ss.firstNeedsUpdate { + mb.recalculateFirstForSubj(subj, ss.First, ss) + } + if sseq <= ss.First { + t += ss.Msgs + } else if sseq <= ss.Last { + // We matched but its a partial. + havePartial = true + } + }) // See if we need to scan msgs here. if havePartial { @@ -2955,11 +3099,9 @@ func (fs *fileStore) NumPending(sseq uint64, filter string, lastPerSubject bool) // Mark fss activity. mb.lsts = time.Now().UnixNano() - for subj, ss := range mb.fss { - if isMatch(subj) { - adjust += ss.Msgs - } - } + mb.fss.Match(stringToBytes(filter), func(bsubj []byte, ss *SimpleState) { + adjust += ss.Msgs + }) } } else { // This is the last block. We need to scan per message here. @@ -3080,7 +3222,7 @@ func (fs *fileStore) newMsgBlockForWrite() (*msgBlock, error) { // Lock should be held to quiet race detector. mb.mu.Lock() mb.setupWriteCache(rbuf) - mb.fss = make(map[string]*SimpleState) + mb.fss = stree.NewSubjectTree[SimpleState]() // Set cache time to creation time to start. ts := time.Now().UnixNano() @@ -3339,6 +3481,17 @@ func (mb *msgBlock) skipMsg(seq uint64, now time.Time) { mb.last.ts = nowts atomic.StoreUint64(&mb.first.seq, seq+1) mb.first.ts = nowts + needsRecord = mb == mb.fs.lmb + if needsRecord && mb.rbytes > 0 { + // We want to make sure since we have no messages + // that we write to the beginning since we only need last one. + mb.rbytes, mb.cache = 0, &cache{} + // If encrypted we need to reset counter since we just keep one. + if mb.bek != nil { + // Recreate to reset counter. + mb.bek, _ = genBlockEncryptionKey(mb.fs.fcfg.Cipher, mb.seed, mb.nonce) + } + } } else { needsRecord = true mb.dmap.Insert(seq) @@ -3521,10 +3674,11 @@ func (fs *fileStore) firstSeqForSubj(subj string) (uint64, error) { // Mark fss activity. mb.lsts = time.Now().UnixNano() - if ss := mb.fss[subj]; ss != nil { + bsubj := stringToBytes(subj) + if ss, ok := mb.fss.Find(bsubj); ok && ss != nil { // Adjust first if it was not where we thought it should be. if i != start { - if info, ok := fs.psim.Find(stringToBytes(subj)); ok { + if info, ok := fs.psim.Find(bsubj); ok { info.fblk = i } } @@ -3608,11 +3762,12 @@ func (fs *fileStore) enforceMsgPerSubjectLimit(fireCallback bool) { // collect all that are not correct. needAttention := make(map[string]*psi) - fs.psim.Match([]byte(fwcs), func(subj []byte, psi *psi) { + fs.psim.Iter(func(subj []byte, psi *psi) bool { numMsgs += psi.total if psi.total > maxMsgsPer { needAttention[string(subj)] = psi } + return true }) // We had an issue with a use case where psim (and hence fss) were correct but idx was not and was not properly being caught. @@ -3632,10 +3787,11 @@ func (fs *fileStore) enforceMsgPerSubjectLimit(fireCallback bool) { fs.rebuildStateLocked(nil) // Need to redo blocks that need attention. needAttention = make(map[string]*psi) - fs.psim.Match([]byte(fwcs), func(subj []byte, psi *psi) { + fs.psim.Iter(func(subj []byte, psi *psi) bool { if psi.total > maxMsgsPer { needAttention[string(subj)] = psi } + return true }) } @@ -3657,8 +3813,8 @@ func (fs *fileStore) enforceMsgPerSubjectLimit(fireCallback bool) { // Grab the ss entry for this subject in case sparse. mb.mu.Lock() mb.ensurePerSubjectInfoLoaded() - ss := mb.fss[subj] - if ss != nil && ss.firstNeedsUpdate { + ss, ok := mb.fss.Find(stringToBytes(subj)) + if ok && ss != nil && ss.firstNeedsUpdate { mb.recalculateFirstForSubj(subj, ss.First, ss) } mb.mu.Unlock() @@ -4753,11 +4909,11 @@ func (mb *msgBlock) writeMsgRecord(rl, seq uint64, subj string, mhdr, msg []byte } // Mark fss activity. mb.lsts = time.Now().UnixNano() - if ss := mb.fss[subj]; ss != nil { + if ss, ok := mb.fss.Find(stringToBytes(subj)); ok && ss != nil { ss.Msgs++ ss.Last = seq } else { - mb.fss[subj] = &SimpleState{Msgs: 1, First: seq, Last: seq} + mb.fss.Insert(stringToBytes(subj), SimpleState{Msgs: 1, First: seq, Last: seq}) } } @@ -5358,7 +5514,7 @@ func (mb *msgBlock) indexCacheBuf(buf []byte) error { // Create FSS if we should track. var popFss bool if mb.fssNotLoaded() { - mb.fss = make(map[string]*SimpleState) + mb.fss = stree.NewSubjectTree[SimpleState]() popFss = true } // Mark fss activity. @@ -5425,15 +5581,15 @@ func (mb *msgBlock) indexCacheBuf(buf []byte) error { // Handle FSS inline here. if popFss && slen > 0 && !mb.noTrack && !erased && !mb.dmap.Exists(seq) { bsubj := buf[index+msgHdrSize : index+msgHdrSize+uint32(slen)] - if ss := mb.fss[string(bsubj)]; ss != nil { + if ss, ok := mb.fss.Find(bsubj); ok && ss != nil { ss.Msgs++ ss.Last = seq } else { - mb.fss[string(bsubj)] = &SimpleState{ + mb.fss.Insert(bsubj, SimpleState{ Msgs: 1, First: seq, Last: seq, - } + }) } } } @@ -6105,15 +6261,31 @@ func (fs *fileStore) loadLast(subj string, sm *StoreMsg) (lsm *StoreMsg, err err return nil, ErrStoreMsgNotFound } - start, stop := fs.lmb.index, fs.blks[0].index wc := subjectHasWildcard(subj) + var start, stop uint32 + // If literal subject check for presence. - if !wc { - if info, ok := fs.psim.Find(stringToBytes(subj)); !ok { + if wc { + start = fs.lmb.index + fs.psim.Match(stringToBytes(subj), func(_ []byte, psi *psi) { + // Keep track of start and stop indexes for this subject. + if psi.fblk < start { + start = psi.fblk + } + if psi.lblk > stop { + stop = psi.lblk + } + }) + // None matched. + if stop == 0 { return nil, ErrStoreMsgNotFound - } else { - start, stop = info.lblk, info.fblk } + // These need to be swapped. + start, stop = stop, start + } else if info, ok := fs.psim.Find(stringToBytes(subj)); ok { + start, stop = info.lblk, info.fblk + } else { + return nil, ErrStoreMsgNotFound } // Walk blocks backwards. @@ -6133,7 +6305,7 @@ func (fs *fileStore) loadLast(subj string, sm *StoreMsg) (lsm *StoreMsg, err err var l uint64 // Optimize if subject is not a wildcard. if !wc { - if ss := mb.fss[subj]; ss != nil { + if ss, ok := mb.fss.Find(stringToBytes(subj)); ok && ss != nil { l = ss.Last } } @@ -6227,7 +6399,12 @@ func (fs *fileStore) LoadNextMsg(filter string, wc bool, start uint64, sm *Store // let's check the psim to see if we can skip ahead. if start <= fs.state.FirstSeq { var ss SimpleState - fs.numFilteredPending(filter, &ss) + fs.numFilteredPendingNoLast(filter, &ss) + // Nothing available. + if ss.Msgs == 0 { + return nil, fs.state.LastSeq, ErrStoreEOF + } + // We can skip ahead. if ss.First > start { start = ss.First } @@ -6243,8 +6420,27 @@ func (fs *fileStore) LoadNextMsg(filter string, wc bool, start uint64, sm *Store return sm, sm.seq, nil } else if err != ErrStoreMsgNotFound { return nil, 0, err - } else if expireOk { - mb.tryForceExpireCache() + } else { + // Nothing found in this block. We missed, if first block (bi) check psim. + // Similar to above if start <= first seq. + // TODO(dlc) - For v2 track these by filter subject since they will represent filtered consumers. + if i == bi { + nbi, lbi := fs.checkSkipFirstBlock(filter, wc) + // Nothing available. + if nbi < 0 || lbi <= bi { + return nil, fs.state.LastSeq, ErrStoreEOF + } + // See if we can jump ahead here. + // Right now we can only spin on first, so if we have interior sparseness need to favor checking per block fss if loaded. + // For v2 will track all blocks that have matches for psim. + if nbi > i { + i = nbi - 1 // For the iterator condition i++ + } + } + // Check is we can expire. + if expireOk { + mb.tryForceExpireCache() + } } } } @@ -6824,11 +7020,13 @@ func (fs *fileStore) Compact(seq uint64) (uint64, error) { bytes += mb.bytes // Make sure we do subject cleanup as well. mb.ensurePerSubjectInfoLoaded() - for subj, ss := range mb.fss { + mb.fss.Iter(func(bsubj []byte, ss *SimpleState) bool { + subj := bytesToString(bsubj) for i := uint64(0); i < ss.Msgs; i++ { fs.removePerSubject(subj) } - } + return true + }) // Now close. mb.dirtyCloseWithRemove(true) mb.mu.Unlock() @@ -7229,13 +7427,17 @@ func (mb *msgBlock) dirtyCloseWithRemove(remove bool) { // Lock should be held. func (mb *msgBlock) removeSeqPerSubject(subj string, seq uint64) { mb.ensurePerSubjectInfoLoaded() - ss := mb.fss[subj] - if ss == nil { + if mb.fss == nil { + return + } + bsubj := stringToBytes(subj) + ss, ok := mb.fss.Find(bsubj) + if !ok || ss == nil { return } if ss.Msgs == 1 { - delete(mb.fss, subj) + mb.fss.Delete(bsubj) return } @@ -7337,7 +7539,7 @@ func (mb *msgBlock) generatePerSubjectInfo() error { } // Create new one regardless. - mb.fss = make(map[string]*SimpleState) + mb.fss = stree.NewSubjectTree[SimpleState]() var smv StoreMsg fseq, lseq := atomic.LoadUint64(&mb.first.seq), atomic.LoadUint64(&mb.last.seq) @@ -7354,16 +7556,16 @@ func (mb *msgBlock) generatePerSubjectInfo() error { return err } if sm != nil && len(sm.subj) > 0 { - if ss := mb.fss[sm.subj]; ss != nil { + if ss, ok := mb.fss.Find(stringToBytes(sm.subj)); ok && ss != nil { ss.Msgs++ ss.Last = seq } else { - mb.fss[sm.subj] = &SimpleState{Msgs: 1, First: seq, Last: seq} + mb.fss.Insert(stringToBytes(sm.subj), SimpleState{Msgs: 1, First: seq, Last: seq}) } } } - if len(mb.fss) > 0 { + if mb.fss.Size() > 0 { // Make sure we run the cache expire timer. mb.llts = time.Now().UnixNano() // Mark fss activity. @@ -7384,7 +7586,7 @@ func (mb *msgBlock) ensurePerSubjectInfoLoaded() error { return nil } if mb.msgs == 0 { - mb.fss = make(map[string]*SimpleState) + mb.fss = stree.NewSubjectTree[SimpleState]() return nil } return mb.generatePerSubjectInfo() @@ -7401,9 +7603,8 @@ func (fs *fileStore) populateGlobalPerSubjectInfo(mb *msgBlock) { } // Now populate psim. - for subj, ss := range mb.fss { - if len(subj) > 0 { - bsubj := stringToBytes(subj) + mb.fss.Iter(func(bsubj []byte, ss *SimpleState) bool { + if len(bsubj) > 0 { if info, ok := fs.psim.Find(bsubj); ok { info.total += ss.Msgs if mb.index > info.lblk { @@ -7411,10 +7612,11 @@ func (fs *fileStore) populateGlobalPerSubjectInfo(mb *msgBlock) { } } else { fs.psim.Insert(bsubj, psi{total: ss.Msgs, fblk: mb.index, lblk: mb.index}) - fs.tsl += len(subj) + fs.tsl += len(bsubj) } } - } + return true + }) } // Close the message block. @@ -7486,10 +7688,23 @@ func (fs *fileStore) Delete() error { os.RemoveAll(pdir) } - // Do Purge() since if we have lots of blocks uses a mv/rename. - fs.Purge() + // Quickly close all blocks and simulate a purge w/o overhead an new write block. + fs.mu.Lock() + for _, mb := range fs.blks { + mb.dirtyClose() + } + dmsgs := fs.state.Msgs + dbytes := int64(fs.state.Bytes) + fs.state.Msgs, fs.state.Bytes = 0, 0 + fs.blks = nil + cb := fs.scb + fs.mu.Unlock() - if err := fs.stop(false); err != nil { + if cb != nil { + cb(-int64(dmsgs), -dbytes, 0, _EMPTY_) + } + + if err := fs.stop(true, false); err != nil { return err } @@ -7505,14 +7720,19 @@ func (fs *fileStore) Delete() error { // Do this in separate Go routine in case lots of blocks. // Purge above protects us as does the removal of meta artifacts above. go func() { + <-dios err := os.RemoveAll(ndir) + dios <- struct{}{} if err == nil { return } ttl := time.Now().Add(time.Second) for time.Now().Before(ttl) { time.Sleep(10 * time.Millisecond) - if err = os.RemoveAll(ndir); err == nil { + <-dios + err = os.RemoveAll(ndir) + dios <- struct{}{} + if err == nil { return } } @@ -7778,11 +7998,11 @@ func (fs *fileStore) _writeFullState(force bool) error { // Stop the current filestore. func (fs *fileStore) Stop() error { - return fs.stop(true) + return fs.stop(false, true) } // Stop the current filestore. -func (fs *fileStore) stop(writeState bool) error { +func (fs *fileStore) stop(delete, writeState bool) error { fs.mu.Lock() if fs.closed || fs.closing { fs.mu.Unlock() @@ -7833,7 +8053,11 @@ func (fs *fileStore) stop(writeState bool) error { fs.cmu.Unlock() for _, o := range cfs { - o.Stop() + if delete { + o.StreamDelete() + } else { + o.Stop() + } } if bytes > 0 && cb != nil { @@ -8550,7 +8774,8 @@ func (o *consumerFileStore) UpdateDelivered(dseq, sseq, dc uint64, ts int64) err // Check for an update to a message already delivered. if sseq <= o.state.Delivered.Stream { if p = o.state.Pending[sseq]; p != nil { - p.Sequence, p.Timestamp = dseq, ts + // Do not update p.Sequence, that should be the original delivery sequence. + p.Timestamp = ts } } else { // Add to pending. @@ -8608,7 +8833,14 @@ func (o *consumerFileStore) UpdateAcks(dseq, sseq uint64) error { return nil } + // Match leader logic on checking if ack is ahead of delivered. + // This could happen on a cooperative takeover with high speed deliveries. + if sseq > o.state.Delivered.Stream { + o.state.Delivered.Stream = sseq + 1 + } + if len(o.state.Pending) == 0 || o.state.Pending[sseq] == nil { + delete(o.state.Redelivered, sseq) return ErrStoreMsgNotFound } @@ -8639,7 +8871,9 @@ func (o *consumerFileStore) UpdateAcks(dseq, sseq uint64) error { // First delete from our pending state. if p, ok := o.state.Pending[sseq]; ok { delete(o.state.Pending, sseq) - dseq = p.Sequence // Use the original. + if dseq > p.Sequence && p.Sequence > 0 { + dseq = p.Sequence // Use the original. + } } if len(o.state.Pending) == 0 { o.state.AckFloor.Consumer = o.state.Delivered.Consumer diff --git a/vendor/github.com/nats-io/nats-server/v2/server/jetstream.go b/vendor/github.com/nats-io/nats-server/v2/server/jetstream.go index 2b877ad862..4ace6731ce 100644 --- a/vendor/github.com/nats-io/nats-server/v2/server/jetstream.go +++ b/vendor/github.com/nats-io/nats-server/v2/server/jetstream.go @@ -1440,7 +1440,11 @@ func (a *Account) maxBytesLimits(cfg *StreamConfig) (bool, int64) { return false, 0 } jsa.usageMu.RLock() - selectedLimits, _, ok := jsa.selectLimits(cfg) + var replicas int + if cfg != nil { + replicas = cfg.Replicas + } + selectedLimits, _, ok := jsa.selectLimits(replicas) jsa.usageMu.RUnlock() if !ok { return false, 0 @@ -1590,7 +1594,7 @@ func diffCheckedLimits(a, b map[string]JetStreamAccountLimits) map[string]JetStr func (jsa *jsAccount) reservedStorage(tier string) (mem, store uint64) { for _, mset := range jsa.streams { cfg := &mset.cfg - if tier == _EMPTY_ || tier == tierName(cfg) && cfg.MaxBytes > 0 { + if tier == _EMPTY_ || tier == tierName(cfg.Replicas) && cfg.MaxBytes > 0 { switch cfg.Storage { case FileStorage: store += uint64(cfg.MaxBytes) @@ -1607,7 +1611,7 @@ func (jsa *jsAccount) reservedStorage(tier string) (mem, store uint64) { func reservedStorage(sas map[string]*streamAssignment, tier string) (mem, store uint64) { for _, sa := range sas { cfg := sa.Config - if tier == _EMPTY_ || tier == tierName(cfg) && cfg.MaxBytes > 0 { + if tier == _EMPTY_ || tier == tierName(cfg.Replicas) && cfg.MaxBytes > 0 { switch cfg.Storage { case FileStorage: store += uint64(cfg.MaxBytes) @@ -1695,17 +1699,29 @@ func (a *Account) JetStreamUsage() JetStreamAccountStats { stats.ReservedMemory, stats.ReservedStore = reservedStorage(sas, _EMPTY_) } for _, sa := range sas { - stats.Consumers += len(sa.consumers) - if !defaultTier { - tier := tierName(sa.Config) - u, ok := stats.Tiers[tier] - if !ok { - u = JetStreamTier{} - } - u.Streams++ + if defaultTier { + stats.Consumers += len(sa.consumers) + } else { stats.Streams++ - u.Consumers += len(sa.consumers) - stats.Tiers[tier] = u + streamTier := tierName(sa.Config.Replicas) + su, ok := stats.Tiers[streamTier] + if !ok { + su = JetStreamTier{} + } + su.Streams++ + stats.Tiers[streamTier] = su + + // Now consumers, check each since could be different tiers. + for _, ca := range sa.consumers { + stats.Consumers++ + consumerTier := tierName(ca.Config.replicas(sa.Config)) + cu, ok := stats.Tiers[consumerTier] + if !ok { + cu = JetStreamTier{} + } + cu.Consumers++ + stats.Tiers[consumerTier] = cu + } } } } else { @@ -2089,9 +2105,8 @@ func (js *jetStream) limitsExceeded(storeType StorageType) bool { return js.wouldExceedLimits(storeType, 0) } -func tierName(cfg *StreamConfig) string { +func tierName(replicas int) string { // TODO (mh) this is where we could select based off a placement tag as well "qos:tier" - replicas := cfg.Replicas if replicas == 0 { replicas = 1 } @@ -2111,11 +2126,11 @@ func (jsa *jsAccount) jetStreamAndClustered() (*jetStream, bool) { } // jsa.usageMu read lock should be held. -func (jsa *jsAccount) selectLimits(cfg *StreamConfig) (JetStreamAccountLimits, string, bool) { +func (jsa *jsAccount) selectLimits(replicas int) (JetStreamAccountLimits, string, bool) { if selectedLimits, ok := jsa.limits[_EMPTY_]; ok { return selectedLimits, _EMPTY_, true } - tier := tierName(cfg) + tier := tierName(replicas) if selectedLimits, ok := jsa.limits[tier]; ok { return selectedLimits, tier, true } diff --git a/vendor/github.com/nats-io/nats-server/v2/server/jetstream_api.go b/vendor/github.com/nats-io/nats-server/v2/server/jetstream_api.go index 99dd719fdd..c675bd1d1c 100644 --- a/vendor/github.com/nats-io/nats-server/v2/server/jetstream_api.go +++ b/vendor/github.com/nats-io/nats-server/v2/server/jetstream_api.go @@ -3267,7 +3267,11 @@ func (s *Server) jsStreamPurgeRequest(sub *subscription, c *client, _ *Account, } func (acc *Account) jsNonClusteredStreamLimitsCheck(cfg *StreamConfig) *ApiError { - selectedLimits, tier, jsa, apiErr := acc.selectLimits(cfg) + var replicas int + if cfg != nil { + replicas = cfg.Replicas + } + selectedLimits, tier, jsa, apiErr := acc.selectLimits(replicas) if apiErr != nil { return apiErr } diff --git a/vendor/github.com/nats-io/nats-server/v2/server/jetstream_cluster.go b/vendor/github.com/nats-io/nats-server/v2/server/jetstream_cluster.go index 505fab1f9d..78b8f9e7d5 100644 --- a/vendor/github.com/nats-io/nats-server/v2/server/jetstream_cluster.go +++ b/vendor/github.com/nats-io/nats-server/v2/server/jetstream_cluster.go @@ -534,12 +534,18 @@ func (js *jetStream) isStreamHealthy(acc *Account, sa *streamAssignment) bool { return false } - // If we are catching up return false. - if mset.isCatchingUp() { + // If R1 we are good. + if node == nil { + return true + } + + // Here we are a replicated stream. + // First make sure our monitor routine is running. + if !mset.isMonitorRunning() { return false } - if node == nil || node.Healthy() { + if node.Healthy() { // Check if we are processing a snapshot and are catching up. if !mset.isCatchingUp() { return true @@ -553,7 +559,6 @@ func (js *jetStream) isStreamHealthy(acc *Account, sa *streamAssignment) bool { js.restartStream(acc, sa) } } - return false } @@ -863,6 +868,8 @@ func (js *jetStream) setupMetaGroup() error { atomic.StoreInt32(&js.clustered, 1) c.registerWithAccount(sacc) + // Set to true before we start. + js.metaRecovering = true js.srv.startGoRoutine( js.monitorCluster, pprofLabels{ @@ -2164,7 +2171,7 @@ func genPeerInfo(peers []string, split int) (newPeers, oldPeers []string, newPee // Should only be called from monitorStream. func (mset *stream) waitOnConsumerAssignments() { mset.mu.RLock() - s, js, acc, sa, name := mset.srv, mset.js, mset.acc, mset.sa, mset.cfg.Name + s, js, acc, sa, name, replicas := mset.srv, mset.js, mset.acc, mset.sa, mset.cfg.Name, mset.cfg.Replicas mset.mu.RUnlock() if s == nil || js == nil || acc == nil || sa == nil { @@ -2186,6 +2193,9 @@ func (mset *stream) waitOnConsumerAssignments() { for _, o := range mset.getConsumers() { // Make sure we are registered with our consumer assignment. if ca := o.consumerAssignment(); ca != nil { + if replicas > 1 && !o.isMonitorRunning() { + break + } numReady++ } else { break @@ -2373,7 +2383,8 @@ func (js *jetStream) monitorStream(mset *stream, sa *streamAssignment, sendSnaps // since we process streams first then consumers as an asset class. mset.waitOnConsumerAssignments() // Setup a periodic check here. - cist = time.NewTicker(30 * time.Second) + // We will fire in 5s the first time then back off to 30s + cist = time.NewTicker(5 * time.Second) cistc = cist.C } @@ -2496,7 +2507,9 @@ func (js *jetStream) monitorStream(mset *stream, sa *streamAssignment, sendSnaps } case <-cistc: - mset.checkInterestState() + cist.Reset(30 * time.Second) + // We may be adjusting some things with consumers so do this in its own go routine. + go mset.checkInterestState() case <-datc: if mset == nil || isRecovering { @@ -4096,7 +4109,7 @@ func (js *jetStream) processConsumerAssignment(ca *consumerAssignment) { sa.consumers = make(map[string]*consumerAssignment) } else if oca := sa.consumers[ca.Name]; oca != nil { wasExisting = true - // Copy over private existing state from former SA. + // Copy over private existing state from former CA. if ca.Group != nil { ca.Group.node = oca.Group.node } @@ -4423,11 +4436,15 @@ func (js *jetStream) processClusterCreateConsumer(ca *consumerAssignment, state s.sendInternalMsgLocked(consumerAssignmentSubj, _EMPTY_, nil, b) } } else { + js.mu.RLock() + node := rg.node + js.mu.RUnlock() + if didCreate { o.setCreatedTime(ca.Created) } else { // Check for scale down to 1.. - if rg.node != nil && len(rg.Peers) == 1 { + if node != nil && len(rg.Peers) == 1 { o.clearNode() o.setLeader(true) // Need to clear from rg too. @@ -4442,7 +4459,7 @@ func (js *jetStream) processClusterCreateConsumer(ca *consumerAssignment, state } } - if rg.node == nil { + if node == nil { // Single replica consumer, process manually here. js.mu.Lock() // Force response in case we think this is an update. @@ -4912,7 +4929,22 @@ func (js *jetStream) applyConsumerEntries(o *consumer, ce *CommittedEntry, isLea } } // Check our interest state if applicable. - o.checkStateForInterestStream() + if err := o.checkStateForInterestStream(); err == errAckFloorHigherThanLastSeq { + o.mu.RLock() + mset := o.mset + o.mu.RUnlock() + // Register pre-acks unless no state at all for the stream and we would create alot of pre-acks. + mset.mu.Lock() + var ss StreamState + mset.store.FastState(&ss) + // Only register if we have a valid FirstSeq. + if ss.FirstSeq > 0 { + for seq := ss.FirstSeq; seq < state.AckFloor.Stream; seq++ { + mset.registerPreAck(o, seq) + } + } + mset.mu.Unlock() + } } } else if e.Type == EntryRemovePeer { @@ -5161,9 +5193,7 @@ func (js *jetStream) processConsumerLeaderChange(o *consumer, isLeader bool) err } else { resp.ConsumerInfo = o.initialInfo() s.sendAPIResponse(client, acc, subject, reply, _EMPTY_, s.jsonResponse(&resp)) - if node := o.raftNode(); node != nil { - o.sendCreateAdvisory() - } + o.sendCreateAdvisory() } return nil @@ -5954,7 +5984,7 @@ func (js *jetStream) createGroupForStream(ci *ClientInfo, cfg *StreamConfig) (*r return nil, errs } -func (acc *Account) selectLimits(cfg *StreamConfig) (*JetStreamAccountLimits, string, *jsAccount, *ApiError) { +func (acc *Account) selectLimits(replicas int) (*JetStreamAccountLimits, string, *jsAccount, *ApiError) { // Grab our jetstream account info. acc.mu.RLock() jsa := acc.js @@ -5965,7 +5995,7 @@ func (acc *Account) selectLimits(cfg *StreamConfig) (*JetStreamAccountLimits, st } jsa.usageMu.RLock() - selectedLimits, tierName, ok := jsa.selectLimits(cfg) + selectedLimits, tierName, ok := jsa.selectLimits(replicas) jsa.usageMu.RUnlock() if !ok { @@ -5976,7 +6006,11 @@ func (acc *Account) selectLimits(cfg *StreamConfig) (*JetStreamAccountLimits, st // Read lock needs to be held func (js *jetStream) jsClusteredStreamLimitsCheck(acc *Account, cfg *StreamConfig) *ApiError { - selectedLimits, tier, _, apiErr := acc.selectLimits(cfg) + var replicas int + if cfg != nil { + replicas = cfg.Replicas + } + selectedLimits, tier, _, apiErr := acc.selectLimits(replicas) if apiErr != nil { return apiErr } @@ -7113,7 +7147,7 @@ func (s *Server) jsClusteredConsumerRequest(ci *ClientInfo, acc *Account, subjec s.sendAPIErrResponse(ci, acc, subject, reply, string(rmsg), s.jsonResponse(&resp)) return } - selectedLimits, _, _, apiErr := acc.selectLimits(&streamCfg) + selectedLimits, _, _, apiErr := acc.selectLimits(cfg.replicas(&streamCfg)) if apiErr != nil { resp.Error = apiErr s.sendAPIErrResponse(ci, acc, subject, reply, string(rmsg), s.jsonResponse(&resp)) @@ -7144,25 +7178,45 @@ func (s *Server) jsClusteredConsumerRequest(ci *ClientInfo, acc *Account, subjec return } + // Was a consumer name provided? + var oname string + if isDurableConsumer(cfg) || cfg.Name != _EMPTY_ { + if cfg.Name != _EMPTY_ { + oname = cfg.Name + } else { + oname = cfg.Durable + } + } + // Check for max consumers here to short circuit if possible. // Start with limit on a stream, but if one is defined at the level of the account // and is lower, use that limit. - maxc := sa.Config.MaxConsumers - if maxc <= 0 || (selectedLimits.MaxConsumers > 0 && selectedLimits.MaxConsumers < maxc) { - maxc = selectedLimits.MaxConsumers - } - if maxc > 0 { - // Don't count DIRECTS. - total := 0 - for _, ca := range sa.consumers { - if ca.Config != nil && !ca.Config.Direct { - total++ - } + if action == ActionCreate || action == ActionCreateOrUpdate { + maxc := sa.Config.MaxConsumers + if maxc <= 0 || (selectedLimits.MaxConsumers > 0 && selectedLimits.MaxConsumers < maxc) { + maxc = selectedLimits.MaxConsumers } - if total >= maxc { - resp.Error = NewJSMaximumConsumersLimitError() - s.sendAPIErrResponse(ci, acc, subject, reply, string(rmsg), s.jsonResponse(&resp)) - return + if maxc > 0 { + // Don't count DIRECTS. + total := 0 + for cn, ca := range sa.consumers { + if action == ActionCreateOrUpdate { + // If the consumer name is specified and we think it already exists, then + // we're likely updating an existing consumer, so don't count it. Otherwise + // we will incorrectly return NewJSMaximumConsumersLimitError for an update. + if oname != _EMPTY_ && cn == oname && sa.consumers[oname] != nil { + continue + } + } + if ca.Config != nil && !ca.Config.Direct { + total++ + } + } + if total >= maxc { + resp.Error = NewJSMaximumConsumersLimitError() + s.sendAPIErrResponse(ci, acc, subject, reply, string(rmsg), s.jsonResponse(&resp)) + return + } } } @@ -7189,16 +7243,10 @@ func (s *Server) jsClusteredConsumerRequest(ci *ClientInfo, acc *Account, subjec } var ca *consumerAssignment - var oname string // See if we have an existing one already under same durable name or // if name was set by the user. - if isDurableConsumer(cfg) || cfg.Name != _EMPTY_ { - if cfg.Name != _EMPTY_ { - oname = cfg.Name - } else { - oname = cfg.Durable - } + if oname != _EMPTY_ { if ca = sa.consumers[oname]; ca != nil && !ca.deleted { if action == ActionCreate && !reflect.DeepEqual(cfg, ca.Config) { resp.Error = NewJSConsumerAlreadyExistsError() @@ -7615,7 +7663,10 @@ func (mset *stream) stateSnapshot() []byte { func (mset *stream) stateSnapshotLocked() []byte { // Decide if we can support the new style of stream snapshots. if mset.supportsBinarySnapshotLocked() { - snap, _ := mset.store.EncodedStreamState(mset.getCLFS()) + snap, err := mset.store.EncodedStreamState(mset.getCLFS()) + if err != nil { + return nil + } return snap } @@ -7707,7 +7758,7 @@ func (mset *stream) processClusteredInboundMsg(subject, reply string, hdr, msg [ if err == nil { err = NewJSAccountResourcesExceededError() } - s.RateLimitWarnf(err.Error()) + s.RateLimitWarnf("JetStream account limits exceeded for '%s': %s", jsa.acc().GetName(), err.Error()) if canRespond { var resp = &JSPubAckResponse{PubAck: &PubAck{Stream: name}} resp.Error = err @@ -8085,8 +8136,11 @@ func (mset *stream) processSnapshot(snap *StreamReplicatedState) (e error) { var sub *subscription var err error - const activityInterval = 30 * time.Second - notActive := time.NewTimer(activityInterval) + const ( + startInterval = 5 * time.Second + activityInterval = 30 * time.Second + ) + notActive := time.NewTimer(startInterval) defer notActive.Stop() defer func() { @@ -8169,7 +8223,7 @@ RETRY: default: } } - notActive.Reset(activityInterval) + notActive.Reset(startInterval) // Grab sync request again on failures. if sreq == nil { @@ -8214,8 +8268,10 @@ RETRY: // Send our sync request. b, _ := json.Marshal(sreq) s.sendInternalMsgLocked(subject, reply, nil, b) + // Remember when we sent this out to avoid loop spins on errors below. reqSendTime := time.Now() + // Clear our sync request. sreq = nil @@ -8764,7 +8820,7 @@ func (mset *stream) runCatchup(sendSubject string, sreq *streamSyncRequest) { done = maxOutMsgs-atomic.LoadInt32(&outm) > minBatchWait if !done { // Wait for a small bit. - time.Sleep(50 * time.Millisecond) + time.Sleep(100 * time.Millisecond) } else { // GC friendly. mw.Stop() @@ -8853,7 +8909,9 @@ func (mset *stream) runCatchup(sendSubject string, sreq *streamSyncRequest) { mset.account(), mset.name(), seq, state) // Try our best to redo our invalidated snapshot as well. if n := mset.raftNode(); n != nil { - n.InstallSnapshot(mset.stateSnapshot()) + if snap := mset.stateSnapshot(); snap != nil { + n.InstallSnapshot(snap) + } } // If we allow gap markers check if we have one pending. if drOk && dr.First > 0 { diff --git a/vendor/github.com/nats-io/nats-server/v2/server/leafnode.go b/vendor/github.com/nats-io/nats-server/v2/server/leafnode.go index 8f3fe627e4..3c20cbdf43 100644 --- a/vendor/github.com/nats-io/nats-server/v2/server/leafnode.go +++ b/vendor/github.com/nats-io/nats-server/v2/server/leafnode.go @@ -584,6 +584,9 @@ func (s *Server) clearObserverState(remote *leafNodeCfg) { return } + acc.jscmMu.Lock() + defer acc.jscmMu.Unlock() + // Walk all streams looking for any clustered stream, skip otherwise. for _, mset := range acc.streams() { node := mset.raftNode() @@ -619,6 +622,9 @@ func (s *Server) checkJetStreamMigrate(remote *leafNodeCfg) { return } + acc.jscmMu.Lock() + defer acc.jscmMu.Unlock() + // Walk all streams looking for any clustered stream, skip otherwise. // If we are the leader force stepdown. for _, mset := range acc.streams() { diff --git a/vendor/github.com/nats-io/nats-server/v2/server/memstore.go b/vendor/github.com/nats-io/nats-server/v2/server/memstore.go index 19560b04da..8cd9070eb7 100644 --- a/vendor/github.com/nats-io/nats-server/v2/server/memstore.go +++ b/vendor/github.com/nats-io/nats-server/v2/server/memstore.go @@ -261,7 +261,7 @@ func (ms *memStore) SkipMsg() uint64 { ms.state.LastSeq = seq ms.state.LastTime = now if ms.state.Msgs == 0 { - ms.state.FirstSeq = seq + ms.state.FirstSeq = seq + 1 ms.state.FirstTime = now } else { ms.dmap.Insert(seq) @@ -389,9 +389,9 @@ func (ms *memStore) filteredStateLocked(sseq uint64, filter string, lastPerSubje } } - tsa := [32]string{} - fsa := [32]string{} - fts := tokenizeSubjectIntoSlice(fsa[:0], filter) + _tsa, _fsa := [32]string{}, [32]string{} + tsa, fsa := _tsa[:0], _fsa[:0] + fsa = tokenizeSubjectIntoSlice(fsa[:0], filter) wc := subjectHasWildcard(filter) // 1. See if we match any subs from fss. @@ -405,8 +405,8 @@ func (ms *memStore) filteredStateLocked(sseq uint64, filter string, lastPerSubje if !wc { return subj == filter } - tts := tokenizeSubjectIntoSlice(tsa[:0], subj) - return isSubsetMatchTokenized(tts, fts) + tsa = tokenizeSubjectIntoSlice(tsa[:0], subj) + return isSubsetMatchTokenized(tsa, fsa) } update := func(fss *SimpleState) { @@ -426,9 +426,8 @@ func (ms *memStore) filteredStateLocked(sseq uint64, filter string, lastPerSubje var havePartial bool // We will track start and end sequences as we go. ms.fss.Match(stringToBytes(filter), func(subj []byte, fss *SimpleState) { - subjs := bytesToString(subj) if fss.firstNeedsUpdate { - ms.recalculateFirstForSubj(subjs, fss.First, fss) + ms.recalculateFirstForSubj(bytesToString(subj), fss.First, fss) } if sseq <= fss.First { update(fss) @@ -465,14 +464,28 @@ func (ms *memStore) filteredStateLocked(sseq uint64, filter string, lastPerSubje } if toScan < toExclude { ss.Msgs, ss.First = 0, 0 - for seq := first; seq <= last; seq++ { - if sm, ok := ms.msgs[seq]; ok && !seen[sm.subj] && isMatch(sm.subj) { - ss.Msgs++ - if ss.First == 0 { - ss.First = seq + + update := func(sm *StoreMsg) { + ss.Msgs++ + if ss.First == 0 { + ss.First = sm.seq + } + if seen != nil { + seen[sm.subj] = true + } + } + // Check if easier to just scan msgs vs the sequence range. + // This can happen with lots of interior deletes. + if last-first > uint64(len(ms.msgs)) { + for _, sm := range ms.msgs { + if sm.seq >= first && sm.seq <= last && !seen[sm.subj] && isMatch(sm.subj) { + update(sm) } - if seen != nil { - seen[sm.subj] = true + } + } else { + for seq := first; seq <= last; seq++ { + if sm, ok := ms.msgs[seq]; ok && !seen[sm.subj] && isMatch(sm.subj) { + update(sm) } } } @@ -482,17 +495,29 @@ func (ms *memStore) filteredStateLocked(sseq uint64, filter string, lastPerSubje var adjust uint64 var tss *SimpleState - for seq := ms.state.FirstSeq; seq < first; seq++ { - if sm, ok := ms.msgs[seq]; ok && !seen[sm.subj] && isMatch(sm.subj) { - if lastPerSubject { - tss, _ = ms.fss.Find(stringToBytes(sm.subj)) + update := func(sm *StoreMsg) { + if lastPerSubject { + tss, _ = ms.fss.Find(stringToBytes(sm.subj)) + } + // If we are last per subject, make sure to only adjust if all messages are before our first. + if tss == nil || tss.Last < first { + adjust++ + } + if seen != nil { + seen[sm.subj] = true + } + } + // Check if easier to just scan msgs vs the sequence range. + if first-ms.state.FirstSeq > uint64(len(ms.msgs)) { + for _, sm := range ms.msgs { + if sm.seq < first && !seen[sm.subj] && isMatch(sm.subj) { + update(sm) } - // If we are last per subject, make sure to only adjust if all messages are before our first. - if tss == nil || tss.Last < first { - adjust++ - } - if seen != nil { - seen[sm.subj] = true + } + } else { + for seq := ms.state.FirstSeq; seq < first; seq++ { + if sm, ok := ms.msgs[seq]; ok && !seen[sm.subj] && isMatch(sm.subj) { + update(sm) } } } @@ -507,10 +532,27 @@ func (ms *memStore) filteredStateLocked(sseq uint64, filter string, lastPerSubje } ss.Msgs -= adjust if needScanFirst { - for seq := first; seq < last; seq++ { - if sm, ok := ms.msgs[seq]; ok && isMatch(sm.subj) { - ss.First = seq - break + // Check if easier to just scan msgs vs the sequence range. + // Since we will need to scan all of the msgs vs below where we break on the first match, + // we will only do so if a few orders of magnitude lower. + if last-first > 100*uint64(len(ms.msgs)) { + low := ms.state.LastSeq + for _, sm := range ms.msgs { + if sm.seq >= first && sm.seq < last && isMatch(sm.subj) { + if sm.seq < low { + low = sm.seq + } + } + } + if low < ms.state.LastSeq { + ss.First = low + } + } else { + for seq := first; seq < last; seq++ { + if sm, ok := ms.msgs[seq]; ok && isMatch(sm.subj) { + ss.First = seq + break + } } } } @@ -559,9 +601,9 @@ func (ms *memStore) SubjectsTotals(filterSubject string) map[string]uint64 { return nil } - tsa := [32]string{} - fsa := [32]string{} - fts := tokenizeSubjectIntoSlice(fsa[:0], filterSubject) + _tsa, _fsa := [32]string{}, [32]string{} + tsa, fsa := _tsa[:0], _fsa[:0] + fsa = tokenizeSubjectIntoSlice(fsa[:0], filterSubject) isAll := filterSubject == _EMPTY_ || filterSubject == fwcs fst := make(map[string]uint64) @@ -570,7 +612,7 @@ func (ms *memStore) SubjectsTotals(filterSubject string) map[string]uint64 { if isAll { fst[subjs] = ss.Msgs } else { - if tts := tokenizeSubjectIntoSlice(tsa[:0], subjs); isSubsetMatchTokenized(tts, fts) { + if tsa = tokenizeSubjectIntoSlice(tsa[:0], subjs); isSubsetMatchTokenized(tsa, fsa) { fst[subjs] = ss.Msgs } } @@ -1176,7 +1218,11 @@ func (ms *memStore) removeSeqPerSubject(subj string, seq uint64) { // Will recalculate the first sequence for this subject in this block. // Lock should be held. func (ms *memStore) recalculateFirstForSubj(subj string, startSeq uint64, ss *SimpleState) { - for tseq := startSeq + 1; tseq <= ss.Last; tseq++ { + tseq := startSeq + 1 + if tseq < ms.state.FirstSeq { + tseq = ms.state.FirstSeq + } + for ; tseq <= ss.Last; tseq++ { if sm := ms.msgs[tseq]; sm != nil && sm.subj == subj { ss.First = tseq ss.firstNeedsUpdate = false @@ -1509,7 +1555,8 @@ func (o *consumerMemStore) UpdateDelivered(dseq, sseq, dc uint64, ts int64) erro // Check for an update to a message already delivered. if sseq <= o.state.Delivered.Stream { if p = o.state.Pending[sseq]; p != nil { - p.Sequence, p.Timestamp = dseq, ts + // Do not update p.Sequence, that should be the original delivery sequence. + p.Timestamp = ts } } else { // Add to pending. @@ -1558,23 +1605,38 @@ func (o *consumerMemStore) UpdateAcks(dseq, sseq uint64) error { if o.cfg.AckPolicy == AckNone { return ErrNoAckPolicy } - if len(o.state.Pending) == 0 || o.state.Pending[sseq] == nil { - return ErrStoreMsgNotFound - } // On restarts the old leader may get a replay from the raft logs that are old. if dseq <= o.state.AckFloor.Consumer { return nil } + // Match leader logic on checking if ack is ahead of delivered. + // This could happen on a cooperative takeover with high speed deliveries. + if sseq > o.state.Delivered.Stream { + o.state.Delivered.Stream = sseq + 1 + } + + if len(o.state.Pending) == 0 || o.state.Pending[sseq] == nil { + delete(o.state.Redelivered, sseq) + return ErrStoreMsgNotFound + } + // Check for AckAll here. if o.cfg.AckPolicy == AckAll { sgap := sseq - o.state.AckFloor.Stream o.state.AckFloor.Consumer = dseq o.state.AckFloor.Stream = sseq - for seq := sseq; seq > sseq-sgap; seq-- { - delete(o.state.Pending, seq) - if len(o.state.Redelivered) > 0 { + if sgap > uint64(len(o.state.Pending)) { + for seq := range o.state.Pending { + if seq <= sseq { + delete(o.state.Pending, seq) + delete(o.state.Redelivered, seq) + } + } + } else { + for seq := sseq; seq > sseq-sgap && len(o.state.Pending) > 0; seq-- { + delete(o.state.Pending, seq) delete(o.state.Redelivered, seq) } } @@ -1586,23 +1648,20 @@ func (o *consumerMemStore) UpdateAcks(dseq, sseq uint64) error { // First delete from our pending state. if p, ok := o.state.Pending[sseq]; ok { delete(o.state.Pending, sseq) - dseq = p.Sequence // Use the original. - } - // Now remove from redelivered. - if len(o.state.Redelivered) > 0 { - delete(o.state.Redelivered, sseq) + if dseq > p.Sequence && p.Sequence > 0 { + dseq = p.Sequence // Use the original. + } } if len(o.state.Pending) == 0 { o.state.AckFloor.Consumer = o.state.Delivered.Consumer o.state.AckFloor.Stream = o.state.Delivered.Stream } else if dseq == o.state.AckFloor.Consumer+1 { - first := o.state.AckFloor.Consumer == 0 o.state.AckFloor.Consumer = dseq o.state.AckFloor.Stream = sseq - if !first && o.state.Delivered.Consumer > dseq { - for ss := sseq + 1; ss < o.state.Delivered.Stream; ss++ { + if o.state.Delivered.Consumer > dseq { + for ss := sseq + 1; ss <= o.state.Delivered.Stream; ss++ { if p, ok := o.state.Pending[ss]; ok { if p.Sequence > 0 { o.state.AckFloor.Consumer = p.Sequence - 1 @@ -1613,6 +1672,8 @@ func (o *consumerMemStore) UpdateAcks(dseq, sseq uint64) error { } } } + // We do these regardless. + delete(o.state.Redelivered, sseq) return nil } diff --git a/vendor/github.com/nats-io/nats-server/v2/server/monitor.go b/vendor/github.com/nats-io/nats-server/v2/server/monitor.go index 11cd864573..b72ee09d57 100644 --- a/vendor/github.com/nats-io/nats-server/v2/server/monitor.go +++ b/vendor/github.com/nats-io/nats-server/v2/server/monitor.go @@ -1387,6 +1387,8 @@ func (s *Server) HandleRoot(w http.ResponseWriter, r *http.Request) { var srcUrl string if gitCommit == _EMPTY_ { srcUrl = "https://github.com/nats-io/nats-server" + } else if serverVersion != _EMPTY_ { + srcUrl = fmt.Sprintf("https://github.com/nats-io/nats-server/tree/%s", serverVersion) } else { srcUrl = fmt.Sprintf("https://github.com/nats-io/nats-server/tree/%s", gitCommit) } @@ -1421,6 +1423,7 @@ func (s *Server) HandleRoot(w http.ResponseWriter, r *http.Request) { Routes LeafNodes Gateways + Raft Groups Health Probe Help @@ -1436,6 +1439,7 @@ func (s *Server) HandleRoot(w http.ResponseWriter, r *http.Request) { s.basePath(RoutezPath), s.basePath(LeafzPath), s.basePath(GatewayzPath), + s.basePath(RaftzPath), s.basePath(HealthzPath), ) } @@ -3490,6 +3494,23 @@ func (s *Server) healthz(opts *HealthzOptions) *HealthStatus { return health } + // Are we still recovering meta layer? + if js.isMetaRecovering() { + if !details { + health.Status = na + health.Error = "JetStream is still recovering meta layer" + + } else { + health.Errors = []HealthzError{ + { + Type: HealthzErrorJetStream, + Error: "JetStream is still recovering meta layer", + }, + } + } + return health + } + // Range across all accounts, the streams assigned to them, and the consumers. // If they are assigned to this server check their status. ourID := meta.ID() @@ -3724,3 +3745,138 @@ func (s *Server) profilez(opts *ProfilezOptions) *ProfilezStatus { Profile: buffer.Bytes(), } } + +type RaftzGroup struct { + ID string `json:"id"` + State string `json:"state"` + Size int `json:"size"` + QuorumNeeded int `json:"quorum_needed"` + Observer bool `json:"observer,omitempty"` + Paused bool `json:"paused,omitempty"` + Committed uint64 `json:"committed"` + Applied uint64 `json:"applied"` + CatchingUp bool `json:"catching_up,omitempty"` + Leader string `json:"leader,omitempty"` + EverHadLeader bool `json:"ever_had_leader"` + Term uint64 `json:"term"` + Vote string `json:"voted_for,omitempty"` + PTerm uint64 `json:"pterm"` + PIndex uint64 `json:"pindex"` + IPQPropLen int `json:"ipq_proposal_len"` + IPQEntryLen int `json:"ipq_entry_len"` + IPQRespLen int `json:"ipq_resp_len"` + IPQApplyLen int `json:"ipq_apply_len"` + WAL StreamState `json:"wal"` + WALError error `json:"wal_error,omitempty"` + Peers map[string]RaftzGroupPeer `json:"peers"` +} + +type RaftzGroupPeer struct { + Name string `json:"name"` + Known bool `json:"known"` + LastReplicatedIndex uint64 `json:"last_replicated_index,omitempty"` + LastSeen string `json:"last_seen,omitempty"` +} + +func (s *Server) HandleRaftz(w http.ResponseWriter, r *http.Request) { + if s.raftNodes == nil { + w.WriteHeader(404) + w.Write([]byte("No Raft nodes registered")) + return + } + + gfilter := r.URL.Query().Get("group") + afilter := r.URL.Query().Get("acc") + if afilter == "" { + afilter = s.SystemAccount().Name + } + + groups := map[string]RaftNode{} + infos := map[string]map[string]RaftzGroup{} // account -> group ID + + s.rnMu.RLock() + if gfilter != _EMPTY_ { + if rg, ok := s.raftNodes[gfilter]; ok && rg != nil { + if n, ok := rg.(*raft); ok { + if n.accName == afilter { + groups[gfilter] = rg + } + } + } + } else { + for name, rg := range s.raftNodes { + if rg == nil { + continue + } + if n, ok := rg.(*raft); ok { + if n.accName != afilter { + continue + } + groups[name] = rg + } + } + } + s.rnMu.RUnlock() + + if len(groups) == 0 { + w.WriteHeader(404) + w.Write([]byte("No Raft nodes found, does the specified account/group exist?")) + return + } + + for name, rg := range groups { + n, ok := rg.(*raft) + if n == nil || !ok { + continue + } + if _, ok := infos[n.accName]; !ok { + infos[n.accName] = map[string]RaftzGroup{} + } + // Only take the lock once, using the public RaftNode functions would + // cause us to take and release the locks over and over again. + n.RLock() + info := RaftzGroup{ + ID: n.id, + State: RaftState(n.state.Load()).String(), + Size: n.csz, + QuorumNeeded: n.qn, + Observer: n.observer, + Paused: n.paused, + Committed: n.commit, + Applied: n.applied, + CatchingUp: n.catchup != nil, + Leader: n.leader, + EverHadLeader: n.pleader, + Term: n.term, + Vote: n.vote, + PTerm: n.pterm, + PIndex: n.pindex, + IPQPropLen: n.prop.len(), + IPQEntryLen: n.entry.len(), + IPQRespLen: n.resp.len(), + IPQApplyLen: n.apply.len(), + WALError: n.werr, + Peers: map[string]RaftzGroupPeer{}, + } + n.wal.FastState(&info.WAL) + for id, p := range n.peers { + if id == n.id { + continue + } + peer := RaftzGroupPeer{ + Name: s.serverNameForNode(id), + Known: p.kp, + LastReplicatedIndex: p.li, + } + if p.ts > 0 { + peer.LastSeen = time.Since(time.Unix(0, p.ts)).String() + } + info.Peers[id] = peer + } + n.RUnlock() + infos[n.accName][name] = info + } + + b, _ := json.MarshalIndent(infos, "", " ") + ResponseHandler(w, r, b) +} diff --git a/vendor/github.com/nats-io/nats-server/v2/server/mqtt.go b/vendor/github.com/nats-io/nats-server/v2/server/mqtt.go index 7ca4908191..33a0010992 100644 --- a/vendor/github.com/nats-io/nats-server/v2/server/mqtt.go +++ b/vendor/github.com/nats-io/nats-server/v2/server/mqtt.go @@ -974,7 +974,7 @@ func (s *Server) mqttHandleClosedClient(c *client) { // This needs to be done outside of any lock. if doClean { - if err := sess.clear(); err != nil { + if err := sess.clear(true); err != nil { c.Errorf(err.Error()) } } @@ -1449,7 +1449,7 @@ func (s *Server) mqttCreateAccountSessionManager(acc *Account, quitCh chan struc // Opportunistically delete the old (legacy) consumer, from v2.10.10 and // before. Ignore any errors that might arise. rmLegacyDurName := mqttRetainedMsgsStreamName + "_" + jsa.id - jsa.deleteConsumer(mqttRetainedMsgsStreamName, rmLegacyDurName) + jsa.deleteConsumer(mqttRetainedMsgsStreamName, rmLegacyDurName, true) // Create a new, uniquely names consumer for retained messages for this // server. The prior one will expire eventually. @@ -1672,8 +1672,21 @@ func (jsa *mqttJSA) createDurableConsumer(cfg *CreateConsumerRequest) (*JSApiCon return ccr, ccr.ToError() } -func (jsa *mqttJSA) deleteConsumer(streamName, consName string) (*JSApiConsumerDeleteResponse, error) { +func (jsa *mqttJSA) sendMsg(subj string, msg []byte) { + if subj == _EMPTY_ { + return + } + jsa.sendq.push(&mqttJSPubMsg{subj: subj, msg: msg, hdr: -1}) +} + +// if noWait is specified, does not wait for the JS response, returns nil +func (jsa *mqttJSA) deleteConsumer(streamName, consName string, noWait bool) (*JSApiConsumerDeleteResponse, error) { subj := fmt.Sprintf(JSApiConsumerDeleteT, streamName, consName) + if noWait { + jsa.sendMsg(subj, nil) + return nil, nil + } + cdri, err := jsa.newRequest(mqttJSAConsumerDel, subj, 0, nil) if err != nil { return nil, err @@ -1950,9 +1963,13 @@ func (as *mqttAccountSessionManager) processRetainedMsg(_ *subscription, c *clie } // If lastSeq is 0 (nothing to recover, or done doing it) and this is // from our own server, ignore. + as.mu.RLock() if as.rrmLastSeq == 0 && rm.Origin == as.jsa.id { + as.mu.RUnlock() return } + as.mu.RUnlock() + // At this point we either recover from our own server, or process a remote retained message. seq, _, _ := ackReplyInfo(reply) @@ -1960,11 +1977,13 @@ func (as *mqttAccountSessionManager) processRetainedMsg(_ *subscription, c *clie as.handleRetainedMsg(rm.Subject, &mqttRetainedMsgRef{sseq: seq}, rm, false) // If we were recovering (lastSeq > 0), then check if we are done. + as.mu.Lock() if as.rrmLastSeq > 0 && seq >= as.rrmLastSeq { as.rrmLastSeq = 0 close(as.rrmDoneCh) as.rrmDoneCh = nil } + as.mu.Unlock() } func (as *mqttAccountSessionManager) processRetainedMsgDel(_ *subscription, c *client, _ *Account, subject, reply string, rmsg []byte) { @@ -3072,7 +3091,7 @@ func (sess *mqttSession) save() error { // // Runs from the client's readLoop. // Lock not held on entry, but session is in the locked map. -func (sess *mqttSession) clear() error { +func (sess *mqttSession) clear(noWait bool) error { var durs []string var pubRelDur string @@ -3100,19 +3119,19 @@ func (sess *mqttSession) clear() error { sess.mu.Unlock() for _, dur := range durs { - if _, err := sess.jsa.deleteConsumer(mqttStreamName, dur); isErrorOtherThan(err, JSConsumerNotFoundErr) { + if _, err := sess.jsa.deleteConsumer(mqttStreamName, dur, noWait); isErrorOtherThan(err, JSConsumerNotFoundErr) { return fmt.Errorf("unable to delete consumer %q for session %q: %v", dur, sess.id, err) } } - if pubRelDur != "" { - _, err := sess.jsa.deleteConsumer(mqttOutStreamName, pubRelDur) + if pubRelDur != _EMPTY_ { + _, err := sess.jsa.deleteConsumer(mqttOutStreamName, pubRelDur, noWait) if isErrorOtherThan(err, JSConsumerNotFoundErr) { return fmt.Errorf("unable to delete consumer %q for session %q: %v", pubRelDur, sess.id, err) } } if seq > 0 { - err := sess.jsa.deleteMsg(mqttSessStreamName, seq, true) + err := sess.jsa.deleteMsg(mqttSessStreamName, seq, !noWait) // Ignore the various errors indicating that the message (or sequence) // is already deleted, can happen in a cluster. if isErrorOtherThan(err, JSSequenceNotFoundErrF) { @@ -3378,7 +3397,7 @@ func (sess *mqttSession) untrackPubRel(pi uint16) (jsAckSubject string) { func (sess *mqttSession) deleteConsumer(cc *ConsumerConfig) { sess.mu.Lock() sess.tmaxack -= cc.MaxAckPending - sess.jsa.sendq.push(&mqttJSPubMsg{subj: sess.jsa.prefixDomain(fmt.Sprintf(JSApiConsumerDeleteT, mqttStreamName, cc.Durable))}) + sess.jsa.deleteConsumer(mqttStreamName, cc.Durable, true) sess.mu.Unlock() } @@ -3717,7 +3736,7 @@ CHECK: // This Session lasts as long as the Network Connection. State data // associated with this Session MUST NOT be reused in any subsequent // Session. - if err := es.clear(); err != nil { + if err := es.clear(false); err != nil { asm.removeSession(es, true) return err } diff --git a/vendor/github.com/nats-io/nats-server/v2/server/raft.go b/vendor/github.com/nats-io/nats-server/v2/server/raft.go index e762417754..347d788eb3 100644 --- a/vendor/github.com/nats-io/nats-server/v2/server/raft.go +++ b/vendor/github.com/nats-io/nats-server/v2/server/raft.go @@ -38,7 +38,7 @@ import ( type RaftNode interface { Propose(entry []byte) error - ProposeDirect(entries []*Entry) error + ProposeMulti(entries []*Entry) error ForwardProposal(entry []byte) error InstallSnapshot(snap []byte) error SendSnapshot(snap []byte) error @@ -85,6 +85,7 @@ type WAL interface { RemoveMsg(index uint64) (bool, error) Compact(index uint64) (uint64, error) Purge() (uint64, error) + PurgeEx(subject string, seq, keep uint64) (uint64, error) Truncate(seq uint64) error State() StreamState FastState(*StreamState) @@ -155,25 +156,27 @@ type raft struct { llqrt time.Time // Last quorum lost time lsut time.Time // Last scale-up time - term uint64 // The current vote term - pterm uint64 // Previous term from the last snapshot - pindex uint64 // Previous index from the last snapshot - commit uint64 // Sequence number of the most recent commit - applied uint64 // Sequence number of the most recently applied commit - hcbehind bool // Were we falling behind at the last health check? (see: isCurrent) + term uint64 // The current vote term + pterm uint64 // Previous term from the last snapshot + pindex uint64 // Previous index from the last snapshot + commit uint64 // Index of the most recent commit + applied uint64 // Index of the most recently applied commit leader string // The ID of the leader vote string // Our current vote state lxfer bool // Are we doing a leadership transfer? + hcbehind bool // Were we falling behind at the last health check? (see: isCurrent) + s *Server // Reference to top-level server c *client // Internal client for subscriptions js *jetStream // JetStream, if running, to see if we are out of resources - dflag bool // Debug flag - pleader bool // Has the group ever had a leader? - observer bool // The node is observing, i.e. not participating in voting - extSt extensionState // Extension state + dflag bool // Debug flag + pleader bool // Has the group ever had a leader? + observer bool // The node is observing, i.e. not participating in voting + + extSt extensionState // Extension state psubj string // Proposals subject rpsubj string // Remove peers subject @@ -232,16 +235,18 @@ const ( hbIntervalDefault = 1 * time.Second lostQuorumIntervalDefault = hbIntervalDefault * 10 // 10 seconds lostQuorumCheckIntervalDefault = hbIntervalDefault * 10 // 10 seconds + observerModeIntervalDefault = 48 * time.Hour ) var ( - minElectionTimeout = minElectionTimeoutDefault - maxElectionTimeout = maxElectionTimeoutDefault - minCampaignTimeout = minCampaignTimeoutDefault - maxCampaignTimeout = maxCampaignTimeoutDefault - hbInterval = hbIntervalDefault - lostQuorumInterval = lostQuorumIntervalDefault - lostQuorumCheck = lostQuorumCheckIntervalDefault + minElectionTimeout = minElectionTimeoutDefault + maxElectionTimeout = maxElectionTimeoutDefault + minCampaignTimeout = minCampaignTimeoutDefault + maxCampaignTimeout = maxCampaignTimeoutDefault + hbInterval = hbIntervalDefault + lostQuorumInterval = lostQuorumIntervalDefault + lostQuorumCheck = lostQuorumCheckIntervalDefault + observerModeInterval = observerModeIntervalDefault ) type RaftConfig struct { @@ -270,6 +275,7 @@ var ( errLeaderLen = fmt.Errorf("raft: leader should be exactly %d bytes", idLen) errTooManyEntries = errors.New("raft: append entry can contain a max of 64k entries") errBadAppendEntry = errors.New("raft: append entry corrupt") + errNoInternalClient = errors.New("raft: no internal client") ) // This will bootstrap a raftNode by writing its config into the store directory. @@ -387,7 +393,7 @@ func (s *Server) startRaftNode(accName string, cfg *RaftConfig, labels pprofLabe apply: newIPQueue[*CommittedEntry](s, qpfx+"committedEntry"), stepdown: newIPQueue[string](s, qpfx+"stepdown"), accName: accName, - leadc: make(chan bool, 1), + leadc: make(chan bool, 32), observer: cfg.Observer, extSt: ps.domainExt, } @@ -414,7 +420,8 @@ func (s *Server) startRaftNode(accName string, cfg *RaftConfig, labels pprofLabe return nil, fmt.Errorf("could not create snapshots directory - %v", err) } - // Can't recover snapshots if memory based. + // Can't recover snapshots if memory based since wal will be reset. + // We will inherit from the current leader. if _, ok := n.wal.(*memStore); ok { os.Remove(filepath.Join(n.sd, snapshotsDir, "*")) } else { @@ -692,36 +699,34 @@ func (n *raft) Propose(data []byte) error { n.debug("Proposal ignored, not leader (state: %v)", state) return errNotLeader } - n.RLock() + n.Lock() + defer n.Unlock() + // Error if we had a previous write error. if werr := n.werr; werr != nil { - n.RUnlock() return werr } - prop := n.prop - n.RUnlock() - - prop.push(newEntry(EntryNormal, data)) + n.prop.push(newEntry(EntryNormal, data)) return nil } -// ProposeDirect will propose entries directly by skipping the Raft state -// machine and sending them straight to the wire instead. +// ProposeDirect will propose multiple entries at once. // This should only be called on the leader. -func (n *raft) ProposeDirect(entries []*Entry) error { +func (n *raft) ProposeMulti(entries []*Entry) error { if state := n.State(); state != Leader { n.debug("Direct proposal ignored, not leader (state: %v)", state) return errNotLeader } - n.RLock() + n.Lock() + defer n.Unlock() + // Error if we had a previous write error. if werr := n.werr; werr != nil { - n.RUnlock() return werr } - n.RUnlock() - - n.sendAppendEntry(entries) + for _, e := range entries { + n.prop.push(e) + } return nil } @@ -871,7 +876,7 @@ func (n *raft) PauseApply() error { n.hcommit = n.commit // Also prevent us from trying to become a leader while paused and catching up. n.pobserver, n.observer = n.observer, true - n.resetElect(48 * time.Hour) + n.resetElect(observerModeInterval) return nil } @@ -1012,25 +1017,20 @@ func (n *raft) InstallSnapshot(data []byte) error { } n.Lock() + defer n.Unlock() // If a write error has occurred already then stop here. if werr := n.werr; werr != nil { - n.Unlock() return werr } // Check that a catchup isn't already taking place. If it is then we won't // allow installing snapshots until it is done. if len(n.progress) > 0 { - n.Unlock() return errCatchupsRunning } - var state StreamState - n.wal.FastState(&state) - if n.applied == 0 { - n.Unlock() return errNoSnapAvailable } @@ -1055,6 +1055,12 @@ func (n *raft) InstallSnapshot(data []byte) error { data: data, } + return n.installSnapshot(snap) +} + +// Install the snapshot. +// Lock should be held. +func (n *raft) installSnapshot(snap *snapshot) error { snapDir := filepath.Join(n.sd, snapshotsDir) sn := fmt.Sprintf(snapFileT, snap.lastTerm, snap.lastIndex) sfile := filepath.Join(snapDir, sn) @@ -1064,29 +1070,21 @@ func (n *raft) InstallSnapshot(data []byte) error { dios <- struct{}{} if err != nil { - n.Unlock() // We could set write err here, but if this is a temporary situation, too many open files etc. // we want to retry and snapshots are not fatal. return err } + // Delete our previous snapshot file if it exists. + if n.snapfile != _EMPTY_ && n.snapfile != sfile { + os.Remove(n.snapfile) + } // Remember our latest snapshot file. n.snapfile = sfile if _, err := n.wal.Compact(snap.lastIndex + 1); err != nil { n.setWriteErrLocked(err) - n.Unlock() return err } - n.Unlock() - - psnaps, _ := os.ReadDir(snapDir) - // Remove any old snapshots. - for _, fi := range psnaps { - pn := fi.Name() - if pn != sn { - os.Remove(filepath.Join(snapDir, pn)) - } - } return nil } @@ -1628,6 +1626,13 @@ func (n *raft) shutdown(shouldDelete bool) { // allowing shutdown() to be called again. If that happens then the below // close(n.quit) will panic from trying to close an already-closed channel. if n.state.Swap(int32(Closed)) == int32(Closed) { + // If we get called again with shouldDelete, in case we were called first with Stop() cleanup + if shouldDelete { + if wal := n.wal; wal != nil { + wal.Delete() + } + os.RemoveAll(n.sd) + } n.Unlock() return } @@ -1644,17 +1649,22 @@ func (n *raft) shutdown(shouldDelete bool) { n.unsubscribe(sub) } c.closeConnection(InternalClient) + n.c = nil } + s, g, wal := n.s, n.group, n.wal // Unregistering ipQueues do not prevent them from push/pop // just will remove them from the central monitoring map queues := []interface { unregister() + drain() }{n.reqs, n.votes, n.prop, n.entry, n.resp, n.apply, n.stepdown} for _, q := range queues { + q.drain() q.unregister() } + sd := n.sd n.Unlock() s.unregisterRaftNode(g) @@ -1669,7 +1679,7 @@ func (n *raft) shutdown(shouldDelete bool) { if shouldDelete { // Delete all our peer state and vote state and any snapshots. - os.RemoveAll(n.sd) + os.RemoveAll(sd) n.debug("Deleted") } else { n.debug("Shutdown") @@ -1724,12 +1734,15 @@ func (n *raft) newInbox() string { // Our internal subscribe. // Lock should be held. func (n *raft) subscribe(subject string, cb msgHandler) (*subscription, error) { + if n.c == nil { + return nil, errNoInternalClient + } return n.s.systemSubscribe(subject, _EMPTY_, false, n.c, cb) } // Lock should be held. func (n *raft) unsubscribe(sub *subscription) { - if sub != nil { + if n.c != nil && sub != nil { n.c.processUnsub(sub.sid) } } @@ -1888,8 +1901,24 @@ func (n *raft) SetObserver(isObserver bool) { func (n *raft) setObserver(isObserver bool, extSt extensionState) { n.Lock() defer n.Unlock() + + if n.paused { + // Applies are paused so we're already in observer state. + // Resuming the applies will set the state back to whatever + // is in "pobserver", so update that instead. + n.pobserver = isObserver + return + } + + wasObserver := n.observer n.observer = isObserver n.extSt = extSt + + // If we're leaving observer state then reset the election timer or + // we might end up waiting for up to the observerModeInterval. + if wasObserver && !isObserver { + n.resetElect(randCampaignTimeout()) + } } // processAppendEntries is called by the Raft state machine when there are @@ -1939,7 +1968,7 @@ func (n *raft) runAsFollower() { n.resetElectionTimeoutWithLock() n.debug("Not switching to candidate, no resources") } else if n.IsObserver() { - n.resetElectWithLock(48 * time.Hour) + n.resetElectWithLock(observerModeInterval) n.debug("Not switching to candidate, observer only") } else if n.isCatchingUp() { n.debug("Not switching to candidate, catching up") @@ -2304,15 +2333,15 @@ func (n *raft) runAsLeader() { return } - n.RLock() + n.Lock() psubj, rpsubj := n.psubj, n.rpsubj - n.RUnlock() // For forwarded proposals, both normal and remove peer proposals. fsub, err := n.subscribe(psubj, n.handleForwardedProposal) if err != nil { n.warn("Error subscribing to forwarded proposals: %v", err) n.stepdown.push(noLeader) + n.Unlock() return } rpsub, err := n.subscribe(rpsubj, n.handleForwardedRemovePeerProposal) @@ -2320,8 +2349,10 @@ func (n *raft) runAsLeader() { n.warn("Error subscribing to forwarded remove peer proposals: %v", err) n.unsubscribe(fsub) n.stepdown.push(noLeader) + n.Unlock() return } + n.Unlock() // Cleanup our subscription when we leave. defer func() { @@ -2450,8 +2481,10 @@ func (n *raft) lostQuorum() bool { } func (n *raft) lostQuorumLocked() bool { - // Make sure we let any scale up actions settle before deciding. - if !n.lsut.IsZero() && time.Since(n.lsut) < lostQuorumInterval { + // In order to avoid false positives that can happen in heavily loaded systems + // make sure nothing is queued up that we have not processed yet. + // Also make sure we let any scale up actions settle before deciding. + if n.resp.len() != 0 || (!n.lsut.IsZero() && time.Since(n.lsut) < lostQuorumInterval) { return false } @@ -3080,17 +3113,20 @@ func (n *raft) truncateWAL(term, index uint64) { if err := n.wal.Truncate(index); err != nil { // If we get an invalid sequence, reset our wal all together. + // We will not have holes, so this means we do not have this message stored anymore. if err == ErrInvalidSequence { n.debug("Resetting WAL") n.wal.Truncate(0) - index, n.term, n.pterm, n.pindex = 0, 0, 0, 0 + // If our index is non-zero use PurgeEx to set us to the correct next index. + if index > 0 { + n.wal.PurgeEx(fwcs, index+1, 0) + } } else { n.warn("Error truncating WAL: %v", err) n.setWriteErrLocked(err) + return } - return } - // Set after we know we have truncated properly. n.term, n.pterm, n.pindex = term, term, index } @@ -3159,15 +3195,17 @@ func (n *raft) processAppendEntry(ae *appendEntry, sub *subscription) { // to a follower of that node instead. if n.State() == Candidate { // Ignore old terms, otherwise we might end up stepping down incorrectly. - if ae.term >= n.term { + // Needs to be ahead of our pterm (last log index), as an isolated node + // could have bumped its vote term up considerably past this point. + if ae.term >= n.pterm { // If the append entry term is newer than the current term, erase our // vote. if ae.term > n.term { - n.term = ae.term n.vote = noVote - n.writeTermVote() } n.debug("Received append entry in candidate state from %q, converting to follower", ae.leader) + n.term = ae.term + n.writeTermVote() n.stepdown.push(ae.leader) } } @@ -3262,7 +3300,7 @@ func (n *raft) processAppendEntry(ae *appendEntry, sub *subscription) { // If terms mismatched, or we got an error loading, delete that entry and all others past it. // Make sure to cancel any catchups in progress. // Truncate will reset our pterm and pindex. Only do so if we have an entry. - n.truncateWAL(ae.pterm, ae.pindex) + n.truncateWAL(eae.pterm, eae.pindex) } // Cancel regardless. n.cancelCatchup() @@ -3309,6 +3347,7 @@ func (n *raft) processAppendEntry(ae *appendEntry, sub *subscription) { return } + // Inherit state from appendEntry with the leader's snapshot. n.pindex = ae.pindex n.pterm = ae.pterm n.commit = ae.pindex @@ -3319,6 +3358,19 @@ func (n *raft) processAppendEntry(ae *appendEntry, sub *subscription) { return } + snap := &snapshot{ + lastTerm: n.pterm, + lastIndex: n.pindex, + peerstate: encodePeerState(&peerState{n.peerNames(), n.csz, n.extSt}), + data: ae.entries[0].Data, + } + // Install the leader's snapshot as our own. + if err := n.installSnapshot(snap); err != nil { + n.setWriteErrLocked(err) + n.Unlock() + return + } + // Now send snapshot to upper levels. Only send the snapshot, not the peerstate entry. n.apply.push(newCommittedEntry(n.commit, ae.entries[:1])) n.Unlock() @@ -3735,7 +3787,8 @@ func readPeerState(sd string) (ps *peerState, err error) { } const termVoteFile = "tav.idx" -const termVoteLen = idLen + 8 +const termLen = 8 // uint64 +const termVoteLen = idLen + termLen // Writes out our term & vote outside of a specific raft context. func writeTermVote(sd string, wtv []byte) error { @@ -3761,6 +3814,10 @@ func (n *raft) readTermVote() (term uint64, voted string, err error) { if err != nil { return 0, noVote, err } + if len(buf) < termLen { + // Not enough bytes for the uint64 below, so avoid a panic. + return 0, noVote, nil + } var le = binary.LittleEndian term = le.Uint64(buf[0:]) if len(buf) < termVoteLen { diff --git a/vendor/github.com/nats-io/nats-server/v2/server/reload.go b/vendor/github.com/nats-io/nats-server/v2/server/reload.go index edf6d06daf..d15525d5d6 100644 --- a/vendor/github.com/nats-io/nats-server/v2/server/reload.go +++ b/vendor/github.com/nats-io/nats-server/v2/server/reload.go @@ -995,9 +995,11 @@ func (s *Server) Reload() error { return s.ReloadOptions(newOpts) } -// ReloadOptions applies any supported options from the provided Option +// ReloadOptions applies any supported options from the provided Options // type. This returns an error if an option which doesn't support // hot-swapping was changed. +// The provided Options type should not be re-used afterwards. +// Either use Options.Clone() to pass a copy, or make a new one. func (s *Server) ReloadOptions(newOpts *Options) error { s.reloadMu.Lock() defer s.reloadMu.Unlock() diff --git a/vendor/github.com/nats-io/nats-server/v2/server/server.go b/vendor/github.com/nats-io/nats-server/v2/server/server.go index 0b0ec2acf4..cc3130ebe5 100644 --- a/vendor/github.com/nats-io/nats-server/v2/server/server.go +++ b/vendor/github.com/nats-io/nats-server/v2/server/server.go @@ -600,6 +600,8 @@ func New(opts *Options) *Server { // NewServer will setup a new server struct after parsing the options. // Could return an error if options can not be validated. +// The provided Options type should not be re-used afterwards. +// Either use Options.Clone() to pass a copy, or make a new one. func NewServer(opts *Options) (*Server, error) { setBaselineOptions(opts) @@ -1095,11 +1097,11 @@ func (s *Server) configureAccounts(reloading bool) (map[string]struct{}, error) if reloading && acc.Name != globalAccountName { if ai, ok := s.accounts.Load(acc.Name); ok { a = ai.(*Account) - a.mu.Lock() // Before updating the account, check if stream imports have changed. if !a.checkStreamImportsEqual(acc) { awcsti[acc.Name] = struct{}{} } + a.mu.Lock() // Collect the sids for the service imports since we are going to // replace with new ones. var sids [][]byte @@ -2062,7 +2064,6 @@ func (s *Server) fetchAccount(name string) (*Account, error) { return nil, err } acc := s.buildInternalAccount(accClaims) - acc.claimJWT = claimJWT // Due to possible race, if registerAccount() returns a non // nil account, it means the same account was already // registered and we should use this one. @@ -2078,6 +2079,7 @@ func (s *Server) fetchAccount(name string) (*Account, error) { var needImportSubs bool acc.mu.Lock() + acc.claimJWT = claimJWT if len(acc.imports.services) > 0 { if acc.ic == nil { acc.ic = s.createInternalAccountClient() @@ -2847,6 +2849,7 @@ const ( JszPath = "/jsz" HealthzPath = "/healthz" IPQueuesPath = "/ipqueuesz" + RaftzPath = "/raftz" ) func (s *Server) basePath(p string) string { @@ -2961,6 +2964,8 @@ func (s *Server) startMonitoring(secure bool) error { mux.HandleFunc(s.basePath(HealthzPath), s.HandleHealthz) // IPQueuesz mux.HandleFunc(s.basePath(IPQueuesPath), s.HandleIPQueuesz) + // Raftz + mux.HandleFunc(s.basePath(RaftzPath), s.HandleRaftz) // Do not set a WriteTimeout because it could cause cURL/browser // to return empty response or unable to display page if the @@ -4093,6 +4098,16 @@ func (s *Server) isLameDuckMode() bool { return s.ldm } +// LameDuckShutdown will perform a lame duck shutdown of NATS, whereby +// the client listener is closed, existing client connections are +// kicked, Raft leaderships are transferred, JetStream is shutdown +// and then finally shutdown the the NATS Server itself. +// This function blocks and will not return until the NATS Server +// has completed the entire shutdown operation. +func (s *Server) LameDuckShutdown() { + s.lameDuckMode() +} + // This function will close the client listener then close the clients // at some interval to avoid a reconnect storm. // We will also transfer any raft leaders and shutdown JetStream. @@ -4222,6 +4237,7 @@ func (s *Server) lameDuckMode() { } } s.Shutdown() + s.WaitForShutdown() } // Send an INFO update to routes with the indication that this server is in LDM mode. @@ -4416,8 +4432,11 @@ func (s *Server) DisconnectClientByID(id uint64) error { if client := s.getClient(id); client != nil { client.closeConnection(Kicked) return nil + } else if client = s.GetLeafNode(id); client != nil { + client.closeConnection(Kicked) + return nil } - return errors.New("no such client id") + return errors.New("no such client or leafnode id") } // LDMClientByID sends a Lame Duck Mode info message to a client by connection ID diff --git a/vendor/github.com/nats-io/nats-server/v2/server/signal.go b/vendor/github.com/nats-io/nats-server/v2/server/signal.go index aa133b4f31..aad65e828f 100644 --- a/vendor/github.com/nats-io/nats-server/v2/server/signal.go +++ b/vendor/github.com/nats-io/nats-server/v2/server/signal.go @@ -51,6 +51,7 @@ func (s *Server) handleSignals() { switch sig { case syscall.SIGINT: s.Shutdown() + s.WaitForShutdown() os.Exit(0) case syscall.SIGTERM: // Shutdown unless graceful shutdown already in progress. @@ -60,6 +61,7 @@ func (s *Server) handleSignals() { if !ldm { s.Shutdown() + s.WaitForShutdown() os.Exit(1) } case syscall.SIGUSR1: diff --git a/vendor/github.com/nats-io/nats-server/v2/server/stream.go b/vendor/github.com/nats-io/nats-server/v2/server/stream.go index 6ed1792e35..a09afdbf32 100644 --- a/vendor/github.com/nats-io/nats-server/v2/server/stream.go +++ b/vendor/github.com/nats-io/nats-server/v2/server/stream.go @@ -462,7 +462,7 @@ func (a *Account) addStreamWithAssignment(config *StreamConfig, fsConfig *FileSt } } jsa.usageMu.RLock() - selected, tier, hasTier := jsa.selectLimits(&cfg) + selected, tier, hasTier := jsa.selectLimits(cfg.Replicas) jsa.usageMu.RUnlock() reserved := int64(0) if !isClustered { @@ -858,7 +858,11 @@ func (mset *stream) setLeader(isLeader bool) error { if mset.sourcesConsumerSetup != nil { mset.sourcesConsumerSetup.Stop() mset.sourcesConsumerSetup = nil + } else { + // Stop any source consumers + mset.stopSourceConsumers() } + // Stop responding to sync requests. mset.stopClusterSubs() // Unsubscribe from direct stream. @@ -1482,19 +1486,38 @@ func (s *Server) checkStreamCfg(config *StreamConfig, acc *Account) (StreamConfi } // Check for literal duplication of subject interest in config - // and no overlap with any JS API subject space + // and no overlap with any JS or SYS API subject space. dset := make(map[string]struct{}, len(cfg.Subjects)) for _, subj := range cfg.Subjects { + // Make sure the subject is valid. Check this first. + if !IsValidSubject(subj) { + return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("invalid subject")) + } if _, ok := dset[subj]; ok { return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("duplicate subjects detected")) } - // Also check to make sure we do not overlap with our $JS API subjects. - if subjectIsSubsetMatch(subj, "$JS.API.>") { - return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("subjects overlap with jetstream api")) + // Check for trying to capture everything. + if subj == fwcs { + if !cfg.NoAck { + return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("capturing all subjects requires no-ack to be true")) + } + // Capturing everything also will require R1. + if cfg.Replicas != 1 { + return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("capturing all subjects requires replicas of 1")) + } } - // Make sure the subject is valid. - if !IsValidSubject(subj) { - return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("invalid subject")) + // Also check to make sure we do not overlap with our $JS API subjects. + if !cfg.NoAck && (subjectIsSubsetMatch(subj, "$JS.>") || subjectIsSubsetMatch(subj, "$JSC.>")) { + // We allow an exception for $JS.EVENT.> since these could have been created in the past. + if !subjectIsSubsetMatch(subj, "$JS.EVENT.>") { + return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("subjects that overlap with jetstream api require no-ack to be true")) + } + } + // And the $SYS subjects. + if !cfg.NoAck && subjectIsSubsetMatch(subj, "$SYS.>") { + if !subjectIsSubsetMatch(subj, "$SYS.ACCOUNT.>") { + return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("subjects that overlap with system api require no-ack to be true")) + } } // Mark for duplicate check. dset[subj] = struct{}{} @@ -1662,9 +1685,9 @@ func (jsa *jsAccount) configUpdateCheck(old, new *StreamConfig, s *Server) (*Str jsa.mu.RLock() acc := jsa.account jsa.usageMu.RLock() - selected, tier, hasTier := jsa.selectLimits(&cfg) + selected, tier, hasTier := jsa.selectLimits(cfg.Replicas) if !hasTier && old.Replicas != cfg.Replicas { - selected, tier, hasTier = jsa.selectLimits(old) + selected, tier, hasTier = jsa.selectLimits(old.Replicas) } jsa.usageMu.RUnlock() reserved := int64(0) @@ -1818,7 +1841,7 @@ func (mset *stream) updateWithAdvisory(config *StreamConfig, sendAdvisory bool) si.trs[i], err = NewSubjectTransform(s.SubjectTransforms[i].Source, s.SubjectTransforms[i].Destination) if err != nil { mset.mu.Unlock() - mset.srv.Errorf("Unable to get subject transform for source: %v", err) + return fmt.Errorf("unable to get subject transform for source: %v", err) } } } @@ -1899,7 +1922,7 @@ func (mset *stream) updateWithAdvisory(config *StreamConfig, sendAdvisory bool) js := mset.js - if targetTier := tierName(cfg); mset.tier != targetTier { + if targetTier := tierName(cfg.Replicas); mset.tier != targetTier { // In cases such as R1->R3, only one update is needed jsa.usageMu.RLock() _, ok := jsa.limits[targetTier] @@ -2187,9 +2210,11 @@ func (mset *stream) processMirrorMsgs(mirror *sourceInfo, ready *sync.WaitGroup) msgs.recycle(&ims) case <-t.C: mset.mu.RLock() + var stalled bool + if mset.mirror != nil { + stalled = time.Since(time.Unix(0, mset.mirror.last.Load())) > sourceHealthCheckInterval + } isLeader := mset.isLeader() - last := time.Unix(0, mset.mirror.last.Load()) - stalled := mset.mirror != nil && time.Since(last) > sourceHealthCheckInterval mset.mu.RUnlock() // No longer leader. if !isLeader { @@ -2406,14 +2431,14 @@ func (mset *stream) skipMsgs(start, end uint64) { return } - // FIXME (dlc) - We should allow proposals of DeleteEange, but would need to make sure all peers support. + // FIXME (dlc) - We should allow proposals of DeleteRange, but would need to make sure all peers support. // With syncRequest was easy to add bool into request. var entries []*Entry for seq := start; seq <= end; seq++ { - entries = append(entries, &Entry{EntryNormal, encodeStreamMsg(_EMPTY_, _EMPTY_, nil, nil, seq-1, 0)}) + entries = append(entries, newEntry(EntryNormal, encodeStreamMsg(_EMPTY_, _EMPTY_, nil, nil, seq-1, 0))) // So a single message does not get too big. if len(entries) > 10_000 { - node.ProposeDirect(entries) + node.ProposeMulti(entries) // We need to re-create `entries` because there is a reference // to it in the node's pae map. entries = entries[:0] @@ -2421,7 +2446,7 @@ func (mset *stream) skipMsgs(start, end uint64) { } // Send all at once. if len(entries) > 0 { - node.ProposeDirect(entries) + node.ProposeMulti(entries) } } @@ -5249,9 +5274,8 @@ func (mset *stream) checkInterestState() { var zeroAcks []*consumer var lowAckFloor uint64 = math.MaxUint64 - consumers := mset.getConsumers() - for _, o := range consumers { + for _, o := range mset.getConsumers() { o.checkStateForInterestStream() o.mu.Lock() @@ -5290,39 +5314,45 @@ func (mset *stream) checkInterestState() { return } - // Hold stream write lock in case we need to purge. - mset.mu.Lock() - defer mset.mu.Unlock() - // Capture our current state. + // ok to do so without lock. var state StreamState mset.store.FastState(&state) - if lowAckFloor < math.MaxUint64 && lowAckFloor > state.FirstSeq { - // Check if we had any zeroAcks, we will need to check them. - for _, o := range zeroAcks { - var np uint64 - o.mu.RLock() - if o.isLeader() { - np = uint64(o.numPending()) - } else { - np, _ = o.calculateNumPending() - } - o.mu.RUnlock() - // This means we have pending and can not remove anything at this time. - if np > 0 { - return - } - } - if lowAckFloor <= state.LastSeq { - // Purge the stream to lowest ack floor + 1 - mset.store.PurgeEx(_EMPTY_, lowAckFloor+1, 0) + if lowAckFloor <= state.FirstSeq { + return + } + + // Do not want to hold stream lock if calculating numPending. + // Check if we had any zeroAcks, we will need to check them. + for _, o := range zeroAcks { + var np uint64 + o.mu.RLock() + if o.isLeader() { + np = uint64(o.numPending()) } else { - // Here we have a low ack floor higher then our last seq. - // So we will just do normal purge. - mset.store.Purge() + np, _ = o.calculateNumPending() + } + o.mu.RUnlock() + // This means we have pending and can not remove anything at this time. + if np > 0 { + return } } + + mset.mu.Lock() + defer mset.mu.Unlock() + + // Check which purge we need to perform. + if lowAckFloor <= state.LastSeq || state.Msgs == 0 { + // Purge the stream to lowest ack floor + 1 + mset.store.PurgeEx(_EMPTY_, lowAckFloor+1, 0) + } else { + // Here we have a low ack floor higher then our last seq. + // So we will just do normal purge. + mset.store.Purge() + } + // Make sure to reset our local lseq. mset.store.FastState(&state) mset.lseq = state.LastSeq @@ -5840,6 +5870,8 @@ func (a *Account) RestoreStream(ncfg *StreamConfig, r io.Reader) (*stream, error } mset, err := a.addStream(&cfg) if err != nil { + // Make sure to clean up after ourselves here. + os.RemoveAll(ndir) return nil, err } if !fcfg.Created.IsZero() { @@ -5975,3 +6007,10 @@ func (mset *stream) clearMonitorRunning() { defer mset.mu.Unlock() mset.inMonitor = false } + +// Check if our monitor is running. +func (mset *stream) isMonitorRunning() bool { + mset.mu.RLock() + defer mset.mu.RUnlock() + return mset.inMonitor +} diff --git a/vendor/github.com/nats-io/nats-server/v2/server/stree/dump.go b/vendor/github.com/nats-io/nats-server/v2/server/stree/dump.go index 4a7d76fb58..60f03e4aad 100644 --- a/vendor/github.com/nats-io/nats-server/v2/server/stree/dump.go +++ b/vendor/github.com/nats-io/nats-server/v2/server/stree/dump.go @@ -51,6 +51,7 @@ func (t *SubjectTree[T]) dump(w io.Writer, n node, depth int) { func (n *leaf[T]) kind() string { return "LEAF" } func (n *node4) kind() string { return "NODE4" } func (n *node16) kind() string { return "NODE16" } +func (n *node48) kind() string { return "NODE48" } func (n *node256) kind() string { return "NODE256" } // Calculates the indendation, etc. diff --git a/vendor/github.com/nats-io/nats-server/v2/server/stree/leaf.go b/vendor/github.com/nats-io/nats-server/v2/server/stree/leaf.go index 839450f2e4..119837ec26 100644 --- a/vendor/github.com/nats-io/nats-server/v2/server/stree/leaf.go +++ b/vendor/github.com/nats-io/nats-server/v2/server/stree/leaf.go @@ -18,16 +18,17 @@ import ( ) // Leaf node +// Order of struct fields for best memory alignment (as per govet/fieldalignment) type leaf[T any] struct { + value T // This could be the whole subject, but most likely just the suffix portion. // We will only store the suffix here and assume all prior prefix paths have // been checked once we arrive at this leafnode. suffix []byte - value T } func newLeaf[T any](suffix []byte, value T) *leaf[T] { - return &leaf[T]{copyBytes(suffix), value} + return &leaf[T]{value, copyBytes(suffix)} } func (n *leaf[T]) isLeaf() bool { return true } diff --git a/vendor/github.com/nats-io/nats-server/v2/server/stree/node16.go b/vendor/github.com/nats-io/nats-server/v2/server/stree/node16.go index 2d206afda7..c0c12aafd5 100644 --- a/vendor/github.com/nats-io/nats-server/v2/server/stree/node16.go +++ b/vendor/github.com/nats-io/nats-server/v2/server/stree/node16.go @@ -14,10 +14,11 @@ package stree // Node with 16 children +// Order of struct fields for best memory alignment (as per govet/fieldalignment) type node16 struct { - meta child [16]node - key [16]byte + meta + key [16]byte } func newNode16(prefix []byte) *node16 { @@ -49,7 +50,7 @@ func (n *node16) findChild(c byte) *node { func (n *node16) isFull() bool { return n.size >= 16 } func (n *node16) grow() node { - nn := newNode256(n.prefix) + nn := newNode48(n.prefix) for i := 0; i < 16; i++ { nn.addChild(n.key[i], n.child[i]) } diff --git a/vendor/github.com/nats-io/nats-server/v2/server/stree/node256.go b/vendor/github.com/nats-io/nats-server/v2/server/stree/node256.go index fdadde0bc0..5d08b1487a 100644 --- a/vendor/github.com/nats-io/nats-server/v2/server/stree/node256.go +++ b/vendor/github.com/nats-io/nats-server/v2/server/stree/node256.go @@ -14,9 +14,10 @@ package stree // Node with 256 children +// Order of struct fields for best memory alignment (as per govet/fieldalignment) type node256 struct { - meta child [256]node + meta } func newNode256(prefix []byte) *node256 { @@ -50,10 +51,10 @@ func (n *node256) deleteChild(c byte) { // Shrink if needed and return new node, otherwise return nil. func (n *node256) shrink() node { - if n.size > 16 { + if n.size > 48 { return nil } - nn := newNode16(nil) + nn := newNode48(nil) for c, child := range n.child { if child != nil { nn.addChild(byte(c), n.child[c]) diff --git a/vendor/github.com/nats-io/nats-server/v2/server/stree/node4.go b/vendor/github.com/nats-io/nats-server/v2/server/stree/node4.go index 2d48962545..6aeb024abf 100644 --- a/vendor/github.com/nats-io/nats-server/v2/server/stree/node4.go +++ b/vendor/github.com/nats-io/nats-server/v2/server/stree/node4.go @@ -14,10 +14,11 @@ package stree // Node with 4 children +// Order of struct fields for best memory alignment (as per govet/fieldalignment) type node4 struct { - meta child [4]node - key [4]byte + meta + key [4]byte } func newNode4(prefix []byte) *node4 { diff --git a/vendor/github.com/nats-io/nats-server/v2/server/stree/node48.go b/vendor/github.com/nats-io/nats-server/v2/server/stree/node48.go new file mode 100644 index 0000000000..fe7ef54352 --- /dev/null +++ b/vendor/github.com/nats-io/nats-server/v2/server/stree/node48.go @@ -0,0 +1,110 @@ +// Copyright 2023-2024 The NATS Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package stree + +// Node with 48 children +// Memory saving vs node256 comes from the fact that the child array is 16 bytes +// per `node` entry, so node256's 256*16=4096 vs node48's 256+(48*16)=1024 +// Note that `key` is effectively 1-indexed, as 0 means no entry, so offset by 1 +// Order of struct fields for best memory alignment (as per govet/fieldalignment) +type node48 struct { + child [48]node + meta + key [256]byte +} + +func newNode48(prefix []byte) *node48 { + nn := &node48{} + nn.setPrefix(prefix) + return nn +} + +func (n *node48) addChild(c byte, nn node) { + if n.size >= 48 { + panic("node48 full!") + } + n.child[n.size] = nn + n.key[c] = byte(n.size + 1) // 1-indexed + n.size++ +} + +func (n *node48) findChild(c byte) *node { + i := n.key[c] + if i == 0 { + return nil + } + return &n.child[i-1] +} + +func (n *node48) isFull() bool { return n.size >= 48 } + +func (n *node48) grow() node { + nn := newNode256(n.prefix) + for c := byte(0); c < 255; c++ { + if i := n.key[c]; i > 0 { + nn.addChild(c, n.child[i-1]) + } + } + return nn +} + +// Deletes a child from the node. +func (n *node48) deleteChild(c byte) { + i := n.key[c] + if i == 0 { + return + } + i-- // Adjust for 1-indexing + last := byte(n.size - 1) + if i < last { + n.child[i] = n.child[last] + for c := byte(0); c <= 255; c++ { + if n.key[c] == last+1 { + n.key[c] = i + 1 + break + } + } + } + n.child[last] = nil + n.key[c] = 0 + n.size-- +} + +// Shrink if needed and return new node, otherwise return nil. +func (n *node48) shrink() node { + if n.size > 16 { + return nil + } + nn := newNode16(nil) + for c := byte(0); c < 255; c++ { + if i := n.key[c]; i > 0 { + nn.addChild(c, n.child[i-1]) + } + } + return nn +} + +// Iterate over all children calling func f. +func (n *node48) iter(f func(node) bool) { + for _, c := range n.child { + if c != nil && !f(c) { + return + } + } +} + +// Return our children as a slice. +func (n *node48) children() []node { + return n.child[:n.size] +} diff --git a/vendor/github.com/nats-io/nats-server/v2/server/stree/stree.go b/vendor/github.com/nats-io/nats-server/v2/server/stree/stree.go index b6531924de..d0835bf5d1 100644 --- a/vendor/github.com/nats-io/nats-server/v2/server/stree/stree.go +++ b/vendor/github.com/nats-io/nats-server/v2/server/stree/stree.go @@ -51,6 +51,10 @@ func (t *SubjectTree[T]) Empty() *SubjectTree[T] { // Insert a value into the tree. Will return if the value was updated and if so the old value. func (t *SubjectTree[T]) Insert(subject []byte, value T) (*T, bool) { + if t == nil { + return nil, false + } + old, updated := t.insert(&t.root, subject, value, 0) if !updated { t.size++ @@ -60,6 +64,10 @@ func (t *SubjectTree[T]) Insert(subject []byte, value T) (*T, bool) { // Find will find the value and return it or false if it was not found. func (t *SubjectTree[T]) Find(subject []byte) (*T, bool) { + if t == nil { + return nil, false + } + var si int for n := t.root; n != nil; { if n.isLeaf() { @@ -88,6 +96,10 @@ func (t *SubjectTree[T]) Find(subject []byte) (*T, bool) { // Delete will delete the item and return its value, or not found if it did not exist. func (t *SubjectTree[T]) Delete(subject []byte) (*T, bool) { + if t == nil { + return nil, false + } + val, deleted := t.delete(&t.root, subject, 0) if deleted { t.size-- @@ -97,7 +109,7 @@ func (t *SubjectTree[T]) Delete(subject []byte) (*T, bool) { // Match will match against a subject that can have wildcards and invoke the callback func for each matched value. func (t *SubjectTree[T]) Match(filter []byte, cb func(subject []byte, val *T)) { - if len(filter) == 0 || cb == nil { + if t == nil || t.root == nil || len(filter) == 0 || cb == nil { return } // We need to break this up into chunks based on wildcards, either pwc '*' or fwc '>'. @@ -340,6 +352,7 @@ func (t *SubjectTree[T]) match(n node, parts [][]byte, pre []byte, cb func(subje t.match(cn, nparts, pre, cb) } } + return } // Here we have normal traversal, so find the next child. nn := n.findChild(p) diff --git a/vendor/modules.txt b/vendor/modules.txt index 9830fd5bf3..83ffd5193d 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -1255,7 +1255,7 @@ github.com/justinas/alice # github.com/kevinburke/ssh_config v1.2.0 ## explicit github.com/kevinburke/ssh_config -# github.com/klauspost/compress v1.17.8 +# github.com/klauspost/compress v1.17.9 ## explicit; go 1.20 github.com/klauspost/compress github.com/klauspost/compress/flate @@ -1363,7 +1363,7 @@ github.com/miekg/dns # github.com/mileusna/useragent v1.3.4 ## explicit; go 1.14 github.com/mileusna/useragent -# github.com/minio/highwayhash v1.0.2 +# github.com/minio/highwayhash v1.0.3 ## explicit; go 1.15 github.com/minio/highwayhash # github.com/minio/md5-simd v1.1.2 @@ -1420,11 +1420,11 @@ github.com/mohae/deepcopy # github.com/mschoch/smat v0.2.0 ## explicit; go 1.13 github.com/mschoch/smat -# github.com/nats-io/jwt/v2 v2.5.7 +# github.com/nats-io/jwt/v2 v2.5.8 ## explicit; go 1.18 github.com/nats-io/jwt/v2 -# github.com/nats-io/nats-server/v2 v2.10.16 -## explicit; go 1.20 +# github.com/nats-io/nats-server/v2 v2.10.18 +## explicit; go 1.21 github.com/nats-io/nats-server/v2/conf github.com/nats-io/nats-server/v2/internal/fastrand github.com/nats-io/nats-server/v2/internal/ldap