build(deps): bump github.com/nats-io/nats-server/v2

Bumps [github.com/nats-io/nats-server/v2](https://github.com/nats-io/nats-server) from 2.10.7 to 2.10.9.
- [Release notes](https://github.com/nats-io/nats-server/releases)
- [Changelog](https://github.com/nats-io/nats-server/blob/main/.goreleaser.yml)
- [Commits](https://github.com/nats-io/nats-server/compare/v2.10.7...v2.10.9)

---
updated-dependencies:
- dependency-name: github.com/nats-io/nats-server/v2
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
This commit is contained in:
dependabot[bot]
2024-01-11 06:22:30 +00:00
committed by Ralf Haferkamp
parent 9ad9458d3b
commit 8e96f59cc6
29 changed files with 1989 additions and 997 deletions

4
go.mod
View File

@@ -60,7 +60,7 @@ require (
github.com/mitchellh/mapstructure v1.5.0
github.com/mna/pigeon v1.2.1
github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826
github.com/nats-io/nats-server/v2 v2.10.7
github.com/nats-io/nats-server/v2 v2.10.9
github.com/nats-io/nats.go v1.31.0
github.com/oklog/run v1.1.0
github.com/olekukonko/tablewriter v0.0.5
@@ -277,7 +277,7 @@ require (
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/mschoch/smat v0.2.0 // indirect
github.com/nats-io/jwt/v2 v2.5.3 // indirect
github.com/nats-io/nkeys v0.4.6 // indirect
github.com/nats-io/nkeys v0.4.7 // indirect
github.com/nats-io/nuid v1.0.1 // indirect
github.com/nxadm/tail v1.4.8 // indirect
github.com/opencontainers/runtime-spec v1.1.0-rc.1 // indirect

8
go.sum
View File

@@ -1742,12 +1742,12 @@ github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRW
github.com/namedotcom/go v0.0.0-20180403034216-08470befbe04/go.mod h1:5sN+Lt1CaY4wsPvgQH/jsuJi4XO2ssZbdsIizr4CVC8=
github.com/nats-io/jwt/v2 v2.5.3 h1:/9SWvzc6hTfamcgXJ3uYRpgj+QuY2aLNqRiqrKcrpEo=
github.com/nats-io/jwt/v2 v2.5.3/go.mod h1:iysuPemFcc7p4IoYots3IuELSI4EDe9Y0bQMe+I3Bf4=
github.com/nats-io/nats-server/v2 v2.10.7 h1:f5VDy+GMu7JyuFA0Fef+6TfulfCs5nBTgq7MMkFJx5Y=
github.com/nats-io/nats-server/v2 v2.10.7/go.mod h1:V2JHOvPiPdtfDXTuEUsthUnCvSDeFrK4Xn9hRo6du7c=
github.com/nats-io/nats-server/v2 v2.10.9 h1:VEW43Zz+p+9lARtiPM9ctd6ckun+92ZT2T17HWtwiFI=
github.com/nats-io/nats-server/v2 v2.10.9/go.mod h1:oorGiV9j3BOLLO3ejQe+U7pfAGyPo+ppD7rpgNF6KTQ=
github.com/nats-io/nats.go v1.31.0 h1:/WFBHEc/dOKBF6qf1TZhrdEfTmOZ5JzdJ+Y3m6Y/p7E=
github.com/nats-io/nats.go v1.31.0/go.mod h1:di3Bm5MLsoB4Bx61CBTsxuarI36WbhAwOm8QrW39+i8=
github.com/nats-io/nkeys v0.4.6 h1:IzVe95ru2CT6ta874rt9saQRkWfe2nFj1NtvYSLqMzY=
github.com/nats-io/nkeys v0.4.6/go.mod h1:4DxZNzenSVd1cYQoAa8948QY3QDjrHfcfVADymtkpts=
github.com/nats-io/nkeys v0.4.7 h1:RwNJbbIdYCoClSDNY7QVKZlyb/wfT6ugvFCiKy6vDvI=
github.com/nats-io/nkeys v0.4.7/go.mod h1:kqXRgRDPlGy7nGaEDMuYzmiJCIAAWDK0IMBtDmGD0nc=
github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw=
github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c=
github.com/nbio/st v0.0.0-20140626010706-e9e8d9816f32/go.mod h1:9wM+0iRr9ahx58uYLpLIr5fm8diHn0JbqRycJi6w0Ms=

View File

@@ -0,0 +1,27 @@
Copyright (c) 2011 The LevelDB-Go Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@@ -0,0 +1,23 @@
// Copyright 2020-2023 The LevelDB-Go, Pebble and NATS Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be found in
// the LICENSE file.
package fastrand
import _ "unsafe" // required by go:linkname
// Uint32 returns a lock free uint32 value.
//
//go:linkname Uint32 runtime.fastrand
func Uint32() uint32
// Uint32n returns a lock free uint32 value in the interval [0, n).
//
//go:linkname Uint32n runtime.fastrandn
func Uint32n(n uint32) uint32
// Uint32 returns a lock free uint64 value.
func Uint64() uint64 {
v := uint64(Uint32())
return v<<32 | uint64(Uint32())
}

View File

@@ -18,7 +18,6 @@ import (
"encoding/hex"
"errors"
"fmt"
"hash/maphash"
"io"
"io/fs"
"math"
@@ -34,6 +33,7 @@ import (
"time"
"github.com/nats-io/jwt/v2"
"github.com/nats-io/nats-server/v2/internal/fastrand"
"github.com/nats-io/nkeys"
"github.com/nats-io/nuid"
)
@@ -89,7 +89,6 @@ type Account struct {
srv *Server // server this account is registered with (possibly nil)
lds string // loop detection subject for leaf nodes
siReply []byte // service reply prefix, will form wildcard subscription.
prand *rand.Rand
eventIds *nuid.NUID
eventIdsMu sync.Mutex
defaultPerms *Permissions
@@ -290,9 +289,6 @@ func (a *Account) shallowCopy(na *Account) {
}
}
na.mappings = a.mappings
if len(na.mappings) > 0 && na.prand == nil {
na.prand = rand.New(rand.NewSource(time.Now().UnixNano()))
}
na.hasMapped.Store(len(na.mappings) > 0)
// JetStream
@@ -605,11 +601,6 @@ func (a *Account) AddWeightedMappings(src string, dests ...*MapDest) error {
a.mu.Lock()
defer a.mu.Unlock()
// We use this for selecting between multiple weighted destinations.
if a.prand == nil {
a.prand = rand.New(rand.NewSource(time.Now().UnixNano()))
}
if !IsValidSubject(src) {
return ErrBadSubject
}
@@ -735,6 +726,18 @@ func (a *Account) RemoveMapping(src string) bool {
a.mappings[len(a.mappings)-1] = nil // gc
a.mappings = a.mappings[:len(a.mappings)-1]
a.hasMapped.Store(len(a.mappings) > 0)
// If we have connected leafnodes make sure to update.
if a.nleafs > 0 {
// Need to release because lock ordering is client -> account
a.mu.Unlock()
// Now grab the leaf list lock. We can hold client lock under this one.
a.lmu.RLock()
for _, lc := range a.lleafs {
lc.forceRemoveFromSmap(src)
}
a.lmu.RUnlock()
a.mu.Lock()
}
return true
}
}
@@ -756,7 +759,7 @@ func (a *Account) selectMappedSubject(dest string) (string, bool) {
return dest, false
}
a.mu.RLock()
a.mu.Lock()
// In case we have to tokenize for subset matching.
tsa := [32]string{}
tts := tsa[:0]
@@ -787,7 +790,7 @@ func (a *Account) selectMappedSubject(dest string) (string, bool) {
}
if m == nil {
a.mu.RUnlock()
a.mu.Unlock()
return dest, false
}
@@ -809,7 +812,7 @@ func (a *Account) selectMappedSubject(dest string) (string, bool) {
if len(dests) == 1 && dests[0].weight == 100 {
d = dests[0]
} else {
w := uint8(a.prand.Int31n(100))
w := uint8(fastrand.Uint32n(100))
for _, rm := range dests {
if w < rm.weight {
d = rm
@@ -826,7 +829,7 @@ func (a *Account) selectMappedSubject(dest string) (string, bool) {
}
}
a.mu.RUnlock()
a.mu.Unlock()
return ndest, true
}
@@ -2193,7 +2196,7 @@ func (a *Account) processServiceImportResponse(sub *subscription, c *client, _ *
// Lock should be held.
func (a *Account) createRespWildcard() {
var b = [baseServerLen]byte{'_', 'R', '_', '.'}
rn := a.prand.Uint64()
rn := fastrand.Uint64()
for i, l := replyPrefixLen, rn; i < len(b); i++ {
b[i] = digits[l%base]
l /= base
@@ -2212,12 +2215,7 @@ func isTrackedReply(reply []byte) bool {
func (a *Account) newServiceReply(tracking bool) []byte {
a.mu.Lock()
s := a.srv
if a.prand == nil {
var h maphash.Hash
h.WriteString(nuid.Next())
a.prand = rand.New(rand.NewSource(int64(h.Sum64())))
}
rn := a.prand.Uint64()
rn := fastrand.Uint64()
// Check if we need to create the reply here.
var createdSiReply bool

View File

@@ -1,4 +1,4 @@
// Copyright 2012-2023 The NATS Authors
// Copyright 2012-2024 The NATS Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -743,13 +743,24 @@ func (s *Server) processClientOrLeafAuthentication(c *client, opts *Options) (au
// Check if we have nkeys or users for client.
hasNkeys := len(s.nkeys) > 0
hasUsers := len(s.users) > 0
if hasNkeys && c.opts.Nkey != _EMPTY_ {
nkey, ok = s.nkeys[c.opts.Nkey]
if !ok || !c.connectionTypeAllowed(nkey.AllowedConnectionTypes) {
s.mu.Unlock()
return false
if hasNkeys {
if (c.kind == CLIENT || c.kind == LEAF) && noAuthUser != _EMPTY_ &&
c.opts.Username == _EMPTY_ && c.opts.Password == _EMPTY_ && c.opts.Token == _EMPTY_ && c.opts.Nkey == _EMPTY_ {
if _, exists := s.nkeys[noAuthUser]; exists {
c.mu.Lock()
c.opts.Nkey = noAuthUser
c.mu.Unlock()
}
}
} else if hasUsers {
if c.opts.Nkey != _EMPTY_ {
nkey, ok = s.nkeys[c.opts.Nkey]
if !ok || !c.connectionTypeAllowed(nkey.AllowedConnectionTypes) {
s.mu.Unlock()
return false
}
}
}
if hasUsers && nkey == nil {
// Check if we are tls verify and are mapping users from the client_certificate.
if tlsMap {
authorized := checkClientTLSCertSubject(c, func(u string, certDN *ldap.DN, _ bool) (string, bool) {
@@ -989,27 +1000,30 @@ func (s *Server) processClientOrLeafAuthentication(c *client, opts *Options) (au
}
if nkey != nil {
if c.opts.Sig == _EMPTY_ {
c.Debugf("Signature missing")
return false
}
sig, err := base64.RawURLEncoding.DecodeString(c.opts.Sig)
if err != nil {
// Allow fallback to normal base64.
sig, err = base64.StdEncoding.DecodeString(c.opts.Sig)
if err != nil {
c.Debugf("Signature not valid base64")
// If we did not match noAuthUser check signature which is required.
if nkey.Nkey != noAuthUser {
if c.opts.Sig == _EMPTY_ {
c.Debugf("Signature missing")
return false
}
sig, err := base64.RawURLEncoding.DecodeString(c.opts.Sig)
if err != nil {
// Allow fallback to normal base64.
sig, err = base64.StdEncoding.DecodeString(c.opts.Sig)
if err != nil {
c.Debugf("Signature not valid base64")
return false
}
}
pub, err := nkeys.FromPublicKey(c.opts.Nkey)
if err != nil {
c.Debugf("User nkey not valid: %v", err)
return false
}
if err := pub.Verify(c.nonce, sig); err != nil {
c.Debugf("Signature not verified")
return false
}
}
pub, err := nkeys.FromPublicKey(c.opts.Nkey)
if err != nil {
c.Debugf("User nkey not valid: %v", err)
return false
}
if err := pub.Verify(c.nonce, sig); err != nil {
c.Debugf("Signature not verified")
return false
}
if err := c.RegisterNkeyUser(nkey); err != nil {
return false
@@ -1308,6 +1322,33 @@ func (s *Server) isLeafNodeAuthorized(c *client) bool {
// with that user (from the leafnode's authorization{} config).
if opts.LeafNode.Username != _EMPTY_ {
return isAuthorized(opts.LeafNode.Username, opts.LeafNode.Password, opts.LeafNode.Account)
} else if opts.LeafNode.Nkey != _EMPTY_ {
if c.opts.Nkey != opts.LeafNode.Nkey {
return false
}
if c.opts.Sig == _EMPTY_ {
c.Debugf("Signature missing")
return false
}
sig, err := base64.RawURLEncoding.DecodeString(c.opts.Sig)
if err != nil {
// Allow fallback to normal base64.
sig, err = base64.StdEncoding.DecodeString(c.opts.Sig)
if err != nil {
c.Debugf("Signature not valid base64")
return false
}
}
pub, err := nkeys.FromPublicKey(c.opts.Nkey)
if err != nil {
c.Debugf("User nkey not valid: %v", err)
return false
}
if err := pub.Verify(c.nonce, sig); err != nil {
c.Debugf("Signature not verified")
return false
}
return s.registerLeafWithAccount(c, opts.LeafNode.Account)
} else if len(opts.LeafNode.Users) > 0 {
if opts.LeafNode.TLSMap {
var user *User
@@ -1425,15 +1466,21 @@ func validateNoAuthUser(o *Options, noAuthUser string) error {
if len(o.TrustedOperators) > 0 {
return fmt.Errorf("no_auth_user not compatible with Trusted Operator")
}
if o.Users == nil {
return fmt.Errorf(`no_auth_user: "%s" present, but users are not defined`, noAuthUser)
if o.Nkeys == nil && o.Users == nil {
return fmt.Errorf(`no_auth_user: "%s" present, but users/nkeys are not defined`, noAuthUser)
}
for _, u := range o.Users {
if u.Username == noAuthUser {
return nil
}
}
for _, u := range o.Nkeys {
if u.Nkey == noAuthUser {
return nil
}
}
return fmt.Errorf(
`no_auth_user: "%s" not present as user in authorization block or account configuration`,
`no_auth_user: "%s" not present as user or nkey in authorization block or account configuration`,
noAuthUser)
}

View File

@@ -52,11 +52,24 @@ var (
}
)
// GetStatusAssertionStr returns the corresponding string representation of the StatusAssertion.
func GetStatusAssertionStr(sa int) string {
return StatusAssertionValToStr[StatusAssertionIntToVal[sa]]
// If the provided status assertion value is not found in the map (StatusAssertionIntToVal),
// the function defaults to "unknown" to avoid defaulting to "good," which is the default iota value
// for the ocsp.StatusAssertion enumeration (https://pkg.go.dev/golang.org/x/crypto/ocsp#pkg-constants).
// This ensures that we don't unintentionally default to "good" when there's no map entry.
v, ok := StatusAssertionIntToVal[sa]
if !ok {
// set unknown as fallback
v = ocsp.Unknown
}
return StatusAssertionValToStr[v]
}
func (sa StatusAssertion) MarshalJSON() ([]byte, error) {
// This ensures that we don't unintentionally default to "good" when there's no map entry.
// (see more details in the GetStatusAssertionStr() comment)
str, ok := StatusAssertionValToStr[sa]
if !ok {
// set unknown as fallback
@@ -66,6 +79,8 @@ func (sa StatusAssertion) MarshalJSON() ([]byte, error) {
}
func (sa *StatusAssertion) UnmarshalJSON(in []byte) error {
// This ensures that we don't unintentionally default to "good" when there's no map entry.
// (see more details in the GetStatusAssertionStr() comment)
v, ok := StatusAssertionStrToVal[strings.ReplaceAll(string(in), "\"", "")]
if !ok {
// set unknown as fallback

View File

@@ -35,6 +35,7 @@ import (
"github.com/klauspost/compress/s2"
"github.com/nats-io/jwt/v2"
"github.com/nats-io/nats-server/v2/internal/fastrand"
)
// Type of client connection.
@@ -442,8 +443,6 @@ type readCache struct {
// to make sure to only send one message and properly scope to queues as needed.
rts []routeTarget
prand *rand.Rand
// These are all temporary totals for an invocation of a read in readloop.
msgs int32
bytes int32
@@ -4505,12 +4504,6 @@ func (c *client) processMsgResults(acc *Account, r *SublistResult, msg, deliver,
goto sendToRoutesOrLeafs
}
// Check to see if we have our own rand yet. Global rand
// has contention with lots of clients, etc.
if c.in.prand == nil {
c.in.prand = rand.New(rand.NewSource(time.Now().UnixNano()))
}
// Process queue subs
for i := 0; i < len(r.qsubs); i++ {
qsubs := r.qsubs[i]
@@ -4558,7 +4551,7 @@ func (c *client) processMsgResults(acc *Account, r *SublistResult, msg, deliver,
sindex := 0
lqs := len(qsubs)
if lqs > 1 {
sindex = c.in.prand.Int() % lqs
sindex = int(fastrand.Uint32()) % lqs
}
// Find a subscription that is able to deliver this message starting at a random index.

View File

@@ -41,7 +41,7 @@ var (
const (
// VERSION is the current version for the server.
VERSION = "2.10.7"
VERSION = "2.10.9"
// PROTO is the currently supported protocol.
// 0 was the original

View File

@@ -1,4 +1,4 @@
// Copyright 2019-2023 The NATS Authors
// Copyright 2019-2024 The NATS Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -128,30 +128,37 @@ const (
)
const (
actionUpdateString = "update"
actionCreateString = "create"
actionCreateOrUpdateString = ""
actionUpdateJSONString = `"update"`
actionCreateJSONString = `"create"`
actionCreateOrUpdateJSONString = `""`
)
var (
actionUpdateJSONBytes = []byte(actionUpdateJSONString)
actionCreateJSONBytes = []byte(actionCreateJSONString)
actionCreateOrUpdateJSONBytes = []byte(actionCreateOrUpdateJSONString)
)
func (a ConsumerAction) String() string {
switch a {
case ActionCreateOrUpdate:
return actionCreateOrUpdateString
return actionCreateOrUpdateJSONString
case ActionCreate:
return actionCreateString
return actionCreateJSONString
case ActionUpdate:
return actionUpdateString
return actionUpdateJSONString
}
return actionCreateOrUpdateString
return actionCreateOrUpdateJSONString
}
func (a ConsumerAction) MarshalJSON() ([]byte, error) {
switch a {
case ActionCreate:
return json.Marshal(actionCreateString)
return actionCreateJSONBytes, nil
case ActionUpdate:
return json.Marshal(actionUpdateString)
return actionUpdateJSONBytes, nil
case ActionCreateOrUpdate:
return json.Marshal(actionCreateOrUpdateString)
return actionCreateOrUpdateJSONBytes, nil
default:
return nil, fmt.Errorf("can not marshal %v", a)
}
@@ -159,11 +166,11 @@ func (a ConsumerAction) MarshalJSON() ([]byte, error) {
func (a *ConsumerAction) UnmarshalJSON(data []byte) error {
switch string(data) {
case jsonString("create"):
case actionCreateJSONString:
*a = ActionCreate
case jsonString("update"):
case actionUpdateJSONString:
*a = ActionUpdate
case jsonString(""):
case actionCreateOrUpdateJSONString:
*a = ActionCreateOrUpdate
default:
return fmt.Errorf("unknown consumer action: %v", string(data))
@@ -249,9 +256,9 @@ const (
func (r ReplayPolicy) String() string {
switch r {
case ReplayInstant:
return "instant"
return replayInstantPolicyJSONString
default:
return "original"
return replayOriginalPolicyJSONString
}
}
@@ -386,12 +393,13 @@ type consumer struct {
// A single subject filter.
type subjectFilter struct {
subject string
nextSeq uint64
currentSeq uint64
pmsg *jsPubMsg
err error
hasWildcard bool
subject string
nextSeq uint64
currentSeq uint64
pmsg *jsPubMsg
err error
hasWildcard bool
tokenizedSubject []string
}
type subjectFilters []*subjectFilter
@@ -699,16 +707,16 @@ func (mset *stream) addConsumer(config *ConsumerConfig) (*consumer, error) {
}
func (mset *stream) addConsumerWithAssignment(config *ConsumerConfig, oname string, ca *consumerAssignment, isRecovering bool, action ConsumerAction) (*consumer, error) {
mset.mu.RLock()
s, jsa, tierName, cfg, acc, closed := mset.srv, mset.jsa, mset.tier, mset.cfg, mset.acc, mset.closed
retention := cfg.Retention
mset.mu.RUnlock()
// Check if this stream has closed.
if closed {
if mset.closed.Load() {
return nil, NewJSStreamInvalidError()
}
mset.mu.RLock()
s, jsa, tierName, cfg, acc := mset.srv, mset.jsa, mset.tier, mset.cfg, mset.acc
retention := cfg.Retention
mset.mu.RUnlock()
// If we do not have the consumer currently assigned to us in cluster mode we will proceed but warn.
// This can happen on startup with restored state where on meta replay we still do not have
// the assignment. Running in single server mode this always returns true.
@@ -936,8 +944,9 @@ func (mset *stream) addConsumerWithAssignment(config *ConsumerConfig, oname stri
subjects := gatherSubjectFilters(o.cfg.FilterSubject, o.cfg.FilterSubjects)
for _, filter := range subjects {
sub := &subjectFilter{
subject: filter,
hasWildcard: subjectHasWildcard(filter),
subject: filter,
hasWildcard: subjectHasWildcard(filter),
tokenizedSubject: tokenizeSubjectIntoSlice(nil, filter),
}
o.subjf = append(o.subjf, sub)
}
@@ -1858,8 +1867,9 @@ func (o *consumer) updateConfig(cfg *ConsumerConfig) error {
newSubjf := make(subjectFilters, 0, len(newSubjects))
for _, newFilter := range newSubjects {
fs := &subjectFilter{
subject: newFilter,
hasWildcard: subjectHasWildcard(newFilter),
subject: newFilter,
hasWildcard: subjectHasWildcard(newFilter),
tokenizedSubject: tokenizeSubjectIntoSlice(nil, newFilter),
}
// If given subject was present, we will retain its fields values
// so `getNextMgs` can take advantage of already buffered `pmsgs`.
@@ -3347,7 +3357,7 @@ func (o *consumer) notifyDeliveryExceeded(sseq, dc uint64) {
o.sendAdvisory(o.deliveryExcEventT, j)
}
// Check to see if the candidate subject matches a filter if its present.
// Check if the candidate subject matches a filter if its present.
// Lock should be held.
func (o *consumer) isFilteredMatch(subj string) bool {
// No filter is automatic match.
@@ -3361,9 +3371,29 @@ func (o *consumer) isFilteredMatch(subj string) bool {
}
// It's quicker to first check for non-wildcard filters, then
// iterate again to check for subset match.
// TODO(dlc) at speed might be better to just do a sublist with L2 and/or possibly L1.
tsa := [32]string{}
tts := tokenizeSubjectIntoSlice(tsa[:0], subj)
for _, filter := range o.subjf {
if subjectIsSubsetMatch(subj, filter.subject) {
if isSubsetMatchTokenized(tts, filter.tokenizedSubject) {
return true
}
}
return false
}
// Check if the candidate filter subject is equal to or a subset match
// of one of the filter subjects.
// Lock should be held.
func (o *consumer) isEqualOrSubsetMatch(subj string) bool {
for _, filter := range o.subjf {
if !filter.hasWildcard && subj == filter.subject {
return true
}
}
tsa := [32]string{}
tts := tokenizeSubjectIntoSlice(tsa[:0], subj)
for _, filter := range o.subjf {
if isSubsetMatchTokenized(filter.tokenizedSubject, tts) {
return true
}
}
@@ -3945,8 +3975,10 @@ func (o *consumer) loopAndGatherMsgs(qch chan struct{}) {
}
} else {
if o.subjf != nil {
tsa := [32]string{}
tts := tokenizeSubjectIntoSlice(tsa[:0], pmsg.subj)
for i, filter := range o.subjf {
if subjectIsSubsetMatch(pmsg.subj, filter.subject) {
if isSubsetMatchTokenized(tts, filter.tokenizedSubject) {
o.subjf[i].currentSeq--
o.subjf[i].nextSeq--
break

View File

@@ -1,4 +1,4 @@
// Copyright 2019-2023 The NATS Authors
// Copyright 2019-2024 The NATS Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -290,7 +290,7 @@ const (
// Maximum size of a write buffer we may consider for re-use.
maxBufReuse = 2 * 1024 * 1024
// default cache buffer expiration
defaultCacheBufferExpiration = 5 * time.Second
defaultCacheBufferExpiration = 2 * time.Second
// default sync interval
defaultSyncInterval = 2 * time.Minute
// default idle timeout to close FDs.
@@ -795,7 +795,7 @@ var blkPoolSmall sync.Pool // 2MB
// Get a new msg block based on sz estimate.
func getMsgBlockBuf(sz int) (buf []byte) {
var pb interface{}
var pb any
if sz <= defaultSmallBlockSize {
pb = blkPoolSmall.Get()
} else if sz <= defaultMediumBlockSize {
@@ -1481,7 +1481,6 @@ func (fs *fileStore) recoverFullState() (rerr error) {
if _, err := os.Stat(pdir); err == nil {
os.RemoveAll(pdir)
}
// Grab our stream state file and load it in.
fn := filepath.Join(fs.fcfg.StoreDir, msgDir, streamStreamStateFile)
buf, err := os.ReadFile(fn)
@@ -1590,6 +1589,13 @@ func (fs *fileStore) recoverFullState() (rerr error) {
// We could reference the underlying buffer, but we could guess wrong if
// number of blocks is large and subjects is low, since we would reference buf.
subj := string(buf[bi : bi+lsubj])
// We had a bug that could cause memory corruption in the PSIM that could have gotten stored to disk.
// Only would affect subjects, so do quick check.
if !isValidSubject(subj, true) {
os.Remove(fn)
fs.warn("Stream state corrupt subject detected")
return errCorruptState
}
bi += lsubj
psi := &psi{total: readU64(), fblk: uint32(readU64())}
if psi.total > 1 {
@@ -2024,6 +2030,7 @@ func (fs *fileStore) expireMsgsOnRecover() {
}
// If we are here we have to process the interior messages of this blk.
// This will load fss as well.
if err := mb.loadMsgsWithLock(); err != nil {
mb.mu.Unlock()
break
@@ -2033,7 +2040,6 @@ func (fs *fileStore) expireMsgsOnRecover() {
var needNextFirst bool
// Walk messages and remove if expired.
mb.ensurePerSubjectInfoLoaded()
fseq, lseq := atomic.LoadUint64(&mb.first.seq), atomic.LoadUint64(&mb.last.seq)
for seq := fseq; seq <= lseq; seq++ {
sm, err := mb.cacheLookup(seq, &smv)
@@ -2196,13 +2202,8 @@ func (mb *msgBlock) firstMatching(filter string, wc bool, start uint64, sm *Stor
fseq, isAll, subs := start, filter == _EMPTY_ || filter == fwcs, []string{filter}
if mb.cacheNotLoaded() {
if err := mb.loadMsgsWithLock(); err != nil {
return nil, false, err
}
if err := mb.ensurePerSubjectInfoLoaded(); err != nil {
return nil, false, err
}
if err := mb.ensurePerSubjectInfoLoaded(); err != nil {
return nil, false, err
}
// If we only have 1 subject currently and it matches our filter we can also set isAll.
@@ -2250,6 +2251,15 @@ func (mb *msgBlock) firstMatching(filter string, wc bool, start uint64, sm *Stor
return nil, false, ErrStoreMsgNotFound
}
var didLoad bool
// Need messages loaded from here on out.
if mb.cacheNotLoaded() {
if err := mb.loadMsgsWithLock(); err != nil {
return nil, false, err
}
didLoad = true
}
if sm == nil {
sm = new(StoreMsg)
}
@@ -2281,7 +2291,7 @@ func (mb *msgBlock) firstMatching(filter string, wc bool, start uint64, sm *Stor
mb.llseq = llseq
}
return nil, false, ErrStoreMsgNotFound
return nil, didLoad, ErrStoreMsgNotFound
}
// This will traverse a message block and generate the filtered pending.
@@ -2533,7 +2543,7 @@ func (fs *fileStore) SubjectsState(subject string) map[string]SimpleState {
mb.mu.Lock()
var shouldExpire bool
if mb.fss == nil {
if mb.fssNotLoaded() {
// Make sure we have fss loaded.
mb.loadMsgsWithLock()
shouldExpire = true
@@ -2581,8 +2591,7 @@ func (fs *fileStore) NumPending(sseq uint64, filter string, lastPerSubject bool)
}
// Track starting for both block for the sseq and staring block that matches any subject.
var seqStart, subjStart int
var seqStart int
// See if we need to figure out starting block per sseq.
if sseq > fs.state.FirstSeq {
// This should not, but can return -1, so make sure we check to avoid panic below.
@@ -2591,8 +2600,6 @@ func (fs *fileStore) NumPending(sseq uint64, filter string, lastPerSubject bool)
}
}
var tsa, fsa [32]string
fts := tokenizeSubjectIntoSlice(fsa[:0], filter)
isAll := filter == _EMPTY_ || filter == fwcs
wc := subjectHasWildcard(filter)
@@ -2602,13 +2609,16 @@ func (fs *fileStore) NumPending(sseq uint64, filter string, lastPerSubject bool)
}
// If we are isAll and have no deleted we can do a simpler calculation.
if isAll && (fs.state.LastSeq-fs.state.FirstSeq+1) == fs.state.Msgs {
if !lastPerSubject && isAll && (fs.state.LastSeq-fs.state.FirstSeq+1) == fs.state.Msgs {
if sseq == 0 {
return fs.state.Msgs, validThrough
}
return fs.state.LastSeq - sseq + 1, validThrough
}
var tsa, fsa [32]string
fts := tokenizeSubjectIntoSlice(fsa[:0], filter)
isMatch := func(subj string) bool {
if isAll {
return true
@@ -2620,81 +2630,134 @@ func (fs *fileStore) NumPending(sseq uint64, filter string, lastPerSubject bool)
return isSubsetMatchTokenized(tts, fts)
}
// Handle last by subject a bit differently.
// We will scan PSIM since we accurately track the last block we have seen the subject in. This
// allows us to only need to load at most one block now.
// For the last block, we need to track the subjects that we know are in that block, and track seen
// while in the block itself, but complexity there worth it.
if lastPerSubject {
// If we want all and our start sequence is equal or less than first return number of subjects.
if isAll && sseq <= fs.state.FirstSeq {
return uint64(len(fs.psim)), validThrough
}
// If we are here we need to scan. We are going to scan the PSIM looking for lblks that are >= seqStart.
// This will build up a list of all subjects from the selected block onward.
lbm := make(map[string]bool)
mb := fs.blks[seqStart]
bi := mb.index
for subj, psi := range fs.psim {
// If the select blk start is greater than entry's last blk skip.
if bi > psi.lblk {
continue
}
if isMatch(subj) {
total++
// We will track the subjects that are an exact match to the last block.
// This is needed for last block processing.
if psi.lblk == bi {
lbm[subj] = true
}
}
}
// Now check if we need to inspect the seqStart block.
// Grab write lock in case we need to load in msgs.
mb.mu.Lock()
var shouldExpire bool
// We need to walk this block to correct accounting from above.
if sseq > mb.first.seq {
// Track the ones we add back in case more than one.
seen := make(map[string]bool)
// We need to discount the total by subjects seen before sseq, but also add them right back in if they are >= sseq for this blk.
// This only should be subjects we know have the last blk in this block.
if mb.cacheNotLoaded() {
mb.loadMsgsWithLock()
shouldExpire = true
}
var smv StoreMsg
for seq, lseq := atomic.LoadUint64(&mb.first.seq), atomic.LoadUint64(&mb.last.seq); seq <= lseq; seq++ {
sm, _ := mb.cacheLookup(seq, &smv)
if sm == nil || sm.subj == _EMPTY_ || !lbm[sm.subj] {
continue
}
if isMatch(sm.subj) {
// If less than sseq adjust off of total as long as this subject matched the last block.
if seq < sseq {
if !seen[sm.subj] {
total--
seen[sm.subj] = true
}
} else if seen[sm.subj] {
// This is equal or more than sseq, so add back in.
total++
// Make sure to not process anymore.
delete(seen, sm.subj)
}
}
}
}
// If we loaded the block try to force expire.
if shouldExpire {
mb.tryForceExpireCacheLocked()
}
mb.mu.Unlock()
return total, validThrough
}
// If we would need to scan more from the beginning, revert back to calculating directly here.
// TODO(dlc) - Redo properly with sublists etc for subject-based filtering.
if lastPerSubject || seqStart >= (len(fs.blks)/2) {
// If we need to track seen for last per subject.
var seen map[string]bool
if lastPerSubject {
seen = make(map[string]bool)
}
if seqStart >= (len(fs.blks) / 2) {
for i := seqStart; i < len(fs.blks); i++ {
var shouldExpire bool
mb := fs.blks[i]
// Hold write lock in case we need to load cache.
mb.mu.Lock()
var t uint64
if isAll && sseq <= atomic.LoadUint64(&mb.first.seq) {
if lastPerSubject {
mb.ensurePerSubjectInfoLoaded()
for subj := range mb.fss {
if !seen[subj] {
total++
seen[subj] = true
}
}
} else {
total += mb.msgs
}
total += mb.msgs
mb.mu.Unlock()
continue
}
// If we are here we need to at least scan the subject fss.
// Make sure we have fss loaded.
mb.ensurePerSubjectInfoLoaded()
if mb.fssNotLoaded() {
mb.loadMsgsWithLock()
shouldExpire = true
}
var havePartial bool
for subj, ss := range mb.fss {
if !seen[subj] && isMatch(subj) {
if lastPerSubject {
// Can't have a partials with last by subject.
if sseq <= ss.Last {
t++
seen[subj] = true
}
} else {
if ss.firstNeedsUpdate {
mb.recalculateFirstForSubj(subj, ss.First, ss)
}
if sseq <= ss.First {
t += ss.Msgs
} else if sseq <= ss.Last {
// We matched but its a partial.
havePartial = true
break
}
if isMatch(subj) {
if ss.firstNeedsUpdate {
mb.recalculateFirstForSubj(subj, ss.First, ss)
}
if sseq <= ss.First {
t += ss.Msgs
} else if sseq <= ss.Last {
// We matched but its a partial.
havePartial = true
break
}
}
}
// See if we need to scan msgs here.
if havePartial {
// Clear on partial.
t = 0
// If we load the cache for a linear scan we want to expire that cache upon exit.
var shouldExpire bool
// Make sure we have the cache loaded.
if mb.cacheNotLoaded() {
mb.loadMsgsWithLock()
shouldExpire = true
}
// Clear on partial.
t = 0
var smv StoreMsg
for seq, lseq := sseq, atomic.LoadUint64(&mb.last.seq); seq <= lseq; seq++ {
if sm, _ := mb.cacheLookup(seq, &smv); sm != nil && (isAll || isMatch(sm.subj)) {
if sm, _ := mb.cacheLookup(seq, &smv); sm != nil && isMatch(sm.subj) {
t++
}
}
// If we loaded this block for this operation go ahead and expire it here.
if shouldExpire {
mb.tryForceExpireCacheLocked()
}
}
// If we loaded this block for this operation go ahead and expire it here.
if shouldExpire {
mb.tryForceExpireCacheLocked()
}
mb.mu.Unlock()
total += t
@@ -2702,24 +2765,15 @@ func (fs *fileStore) NumPending(sseq uint64, filter string, lastPerSubject bool)
return total, validThrough
}
// If we are here its better to calculate totals from psim and adjust downward by scanning less blocks.
// If we are here it's better to calculate totals from psim and adjust downward by scanning less blocks.
// TODO(dlc) - Eventually when sublist uses generics, make this sublist driven instead.
start := uint32(math.MaxUint32)
for subj, psi := range fs.psim {
if isMatch(subj) {
if lastPerSubject {
total++
// Keep track of start index for this subject.
// Use last block in this case.
if psi.lblk < start {
start = psi.lblk
}
} else {
total += psi.total
// Keep track of start index for this subject.
if psi.fblk < start {
start = psi.fblk
}
total += psi.total
// Keep track of start index for this subject.
if psi.fblk < start {
start = psi.fblk
}
}
}
@@ -2729,11 +2783,8 @@ func (fs *fileStore) NumPending(sseq uint64, filter string, lastPerSubject bool)
}
// If we are here we need to calculate partials for the first blocks.
subjStart = int(start)
firstSubjBlk := fs.bim[uint32(subjStart)]
firstSubjBlk := fs.bim[start]
var firstSubjBlkFound bool
var smv StoreMsg
// Adjust in case not found.
if firstSubjBlk == nil {
firstSubjBlkFound = true
@@ -2741,62 +2792,53 @@ func (fs *fileStore) NumPending(sseq uint64, filter string, lastPerSubject bool)
// Track how many we need to adjust against the total.
var adjust uint64
for i := 0; i <= seqStart; i++ {
mb := fs.blks[i]
// We can skip blks if we know they are below the first one that has any subject matches.
if !firstSubjBlkFound {
if mb == firstSubjBlk {
firstSubjBlkFound = true
} else {
if firstSubjBlkFound = (mb == firstSubjBlk); !firstSubjBlkFound {
continue
}
}
// We need to scan this block.
var shouldExpire bool
mb.mu.Lock()
// Check if we should include all of this block in adjusting. If so work with metadata.
if sseq > atomic.LoadUint64(&mb.last.seq) {
if isAll && !lastPerSubject {
if isAll {
adjust += mb.msgs
} else {
// We need to adjust for all matches in this block.
// We will scan fss state vs messages themselves.
// Make sure we have fss loaded.
mb.ensurePerSubjectInfoLoaded()
// Make sure we have fss loaded. This loads whole block now.
if mb.cacheNotLoaded() {
mb.loadMsgsWithLock()
shouldExpire = true
}
for subj, ss := range mb.fss {
if isMatch(subj) {
if lastPerSubject {
adjust++
} else {
adjust += ss.Msgs
}
adjust += ss.Msgs
}
}
}
} else {
// This is the last block. We need to scan per message here.
if mb.cacheNotLoaded() {
if err := mb.loadMsgsWithLock(); err != nil {
mb.mu.Unlock()
return 0, 0
}
mb.loadMsgsWithLock()
shouldExpire = true
}
var last = atomic.LoadUint64(&mb.last.seq)
if sseq < last {
last = sseq
}
// We need to walk all messages in this block
var smv StoreMsg
for seq := atomic.LoadUint64(&mb.first.seq); seq < last; seq++ {
sm, _ := mb.cacheLookup(seq, &smv)
if sm == nil {
if sm == nil || sm.subj == _EMPTY_ {
continue
}
// Check if it matches our filter.
if isMatch(sm.subj) && sm.seq < sseq {
if sm.seq < sseq && isMatch(sm.subj) {
adjust++
}
}
@@ -3180,8 +3222,27 @@ func (fs *fileStore) SkipMsg() uint64 {
fs.mu.Lock()
defer fs.mu.Unlock()
// Grab our current last message block.
mb := fs.lmb
if mb == nil || mb.msgs > 0 && mb.blkSize()+emptyRecordLen > fs.fcfg.BlockSize {
if mb != nil && fs.fcfg.Compression != NoCompression {
// We've now reached the end of this message block, if we want
// to compress blocks then now's the time to do it.
go mb.recompressOnDiskIfNeeded()
}
var err error
if mb, err = fs.newMsgBlockForWrite(); err != nil {
return 0
}
}
// Grab time and last seq.
now, seq := time.Now().UTC(), fs.state.LastSeq+1
// Write skip msg.
mb.skipMsg(seq, now)
// Update fs state.
fs.state.LastSeq, fs.state.LastTime = seq, now
if fs.state.Msgs == 0 {
fs.state.FirstSeq, fs.state.FirstTime = seq, now
@@ -3189,11 +3250,84 @@ func (fs *fileStore) SkipMsg() uint64 {
if seq == fs.state.FirstSeq {
fs.state.FirstSeq, fs.state.FirstTime = seq+1, now
}
fs.lmb.skipMsg(seq, now)
// Mark as dirty for stream state.
fs.dirty++
return seq
}
// Skip multiple msgs. We will determine if we can fit into current lmb or we need to create a new block.
func (fs *fileStore) SkipMsgs(seq uint64, num uint64) error {
fs.mu.Lock()
defer fs.mu.Unlock()
// Check sequence matches our last sequence.
if seq != fs.state.LastSeq+1 {
if seq > 0 {
return ErrSequenceMismatch
}
seq = fs.state.LastSeq + 1
}
// Limit number of dmap entries
const maxDeletes = 64 * 1024
mb := fs.lmb
numDeletes := int(num)
if mb != nil {
numDeletes += mb.dmap.Size()
}
if mb == nil || numDeletes > maxDeletes && mb.msgs > 0 || mb.msgs > 0 && mb.blkSize()+emptyRecordLen > fs.fcfg.BlockSize {
if mb != nil && fs.fcfg.Compression != NoCompression {
// We've now reached the end of this message block, if we want
// to compress blocks then now's the time to do it.
go mb.recompressOnDiskIfNeeded()
}
var err error
if mb, err = fs.newMsgBlockForWrite(); err != nil {
return err
}
}
// Insert into dmap all entries and place last as marker.
now := time.Now().UTC()
nowts := now.UnixNano()
lseq := seq + num - 1
mb.mu.Lock()
var needsRecord bool
// If we are empty update meta directly.
if mb.msgs == 0 {
atomic.StoreUint64(&mb.last.seq, lseq)
mb.last.ts = nowts
atomic.StoreUint64(&mb.first.seq, lseq+1)
mb.first.ts = nowts
} else {
needsRecord = true
for ; seq <= lseq; seq++ {
mb.dmap.Insert(seq)
}
}
mb.mu.Unlock()
// Write out our placeholder.
if needsRecord {
mb.writeMsgRecord(emptyRecordLen, lseq|ebit, _EMPTY_, nil, nil, nowts, true)
}
// Now update FS accounting.
// Update fs state.
fs.state.LastSeq, fs.state.LastTime = lseq, now
if fs.state.Msgs == 0 {
fs.state.FirstSeq, fs.state.FirstTime = lseq+1, now
}
// Mark as dirty for stream state.
fs.dirty++
return nil
}
// Lock should be held.
func (fs *fileStore) rebuildFirst() {
if len(fs.blks) == 0 {
@@ -3238,9 +3372,14 @@ func (fs *fileStore) firstSeqForSubj(subj string) (uint64, error) {
continue
}
mb.mu.Lock()
if err := mb.ensurePerSubjectInfoLoaded(); err != nil {
mb.mu.Unlock()
return 0, err
var shouldExpire bool
if mb.fssNotLoaded() {
// Make sure we have fss loaded.
if err := mb.loadMsgsWithLock(); err != nil {
mb.mu.Unlock()
return 0, err
}
shouldExpire = true
}
if ss := mb.fss[subj]; ss != nil {
// Adjust first if it was not where we thought it should be.
@@ -3255,6 +3394,11 @@ func (fs *fileStore) firstSeqForSubj(subj string) (uint64, error) {
mb.mu.Unlock()
return ss.First, nil
}
// If we did not find it and we loaded this msgBlock try to expire as long as not the last.
if shouldExpire {
// Expire this cache before moving on.
mb.tryForceExpireCacheLocked()
}
mb.mu.Unlock()
}
return 0, nil
@@ -3361,9 +3505,6 @@ func (fs *fileStore) enforceMsgPerSubjectLimit(fireCallback bool) {
}
// Grab the ss entry for this subject in case sparse.
mb.mu.Lock()
if mb.cacheNotLoaded() {
mb.loadMsgsWithLock()
}
mb.ensurePerSubjectInfoLoaded()
ss := mb.fss[subj]
if ss != nil && ss.firstNeedsUpdate {
@@ -4154,7 +4295,7 @@ func (fs *fileStore) selectNextFirst() {
// Lock should be held.
func (mb *msgBlock) resetCacheExpireTimer(td time.Duration) {
if td == 0 {
td = mb.cexp
td = mb.cexp + 100*time.Millisecond
}
if mb.ctmr == nil {
mb.ctmr = time.AfterFunc(td, mb.expireCache)
@@ -4249,7 +4390,7 @@ func (mb *msgBlock) tryExpireWriteCache() []byte {
// Lock should be held.
func (mb *msgBlock) expireCacheLocked() {
if mb.cache == nil && mb.fss == nil {
if mb.cache == nil {
if mb.ctmr != nil {
mb.ctmr.Stop()
mb.ctmr = nil
@@ -4289,9 +4430,8 @@ func (mb *msgBlock) expireCacheLocked() {
mb.cache.wp = 0
}
// Check if we can clear out our fss and idx unless under force expire.
// We used to hold onto the idx longer but removes need buf now so no point.
mb.fss = nil
// Check if we can clear out our idx unless under force expire.
// fss we keep longer and expire under sync timer checks.
mb.clearCache()
}
@@ -4680,9 +4820,7 @@ func (mb *msgBlock) recompressOnDiskIfNeeded() error {
// Wait for disk I/O slots to become available. This prevents us from
// running away with system resources.
<-dios
defer func() {
dios <- struct{}{}
}()
defer func() { dios <- struct{}{} }()
alg := mb.fs.fcfg.Compression
mb.mu.Lock()
@@ -4854,6 +4992,7 @@ func (fs *fileStore) syncBlocks() {
}
blks := append([]*msgBlock(nil), fs.blks...)
lmb := fs.lmb
syncInterval := fs.fcfg.SyncInterval
fs.mu.RUnlock()
var markDirty bool
@@ -4868,6 +5007,12 @@ func (fs *fileStore) syncBlocks() {
if mb.mfd != nil && mb.sinceLastWriteActivity() > closeFDsIdle {
mb.dirtyCloseWithRemove(false)
}
// Check our fss subject metadata.
// If we have no activity within sync interval remove.
if mb.fssLoaded() && mb.sinceLastActivity() > syncInterval {
mb.fss = nil
}
// Check if we should compact here as well.
// Do not compact last mb.
var needsCompact bool
@@ -5013,24 +5158,30 @@ func (mb *msgBlock) indexCacheBuf(buf []byte) error {
mbFirstSeq := atomic.LoadUint64(&mb.first.seq)
// Capture beginning size of dmap.
dms := uint64(mb.dmap.Size())
idxSz := atomic.LoadUint64(&mb.last.seq) - mbFirstSeq + 1
if mb.cache == nil {
// Approximation, may adjust below.
fseq = mbFirstSeq
idx = make([]uint32, 0, mb.msgs)
idx = make([]uint32, 0, idxSz)
mb.cache = &cache{}
} else {
fseq = mb.cache.fseq
idx = mb.cache.idx
if len(idx) == 0 {
idx = make([]uint32, 0, mb.msgs)
idx = make([]uint32, 0, idxSz)
}
index = uint32(len(mb.cache.buf))
buf = append(mb.cache.buf, buf...)
}
// Create FSS if we should track.
if !mb.noTrack {
var popFss bool
if mb.fssNotLoaded() {
mb.fss = make(map[string]*SimpleState)
popFss = true
}
lbuf := uint32(len(buf))
@@ -5070,7 +5221,9 @@ func (mb *msgBlock) indexCacheBuf(buf []byte) error {
// If we have a hole fill it.
for dseq := mbFirstSeq + uint64(len(idx)); dseq < seq; dseq++ {
idx = append(idx, dbit)
mb.dmap.Insert(dseq)
if dms == 0 {
mb.dmap.Insert(dseq)
}
}
}
// Add to our index.
@@ -5082,12 +5235,12 @@ func (mb *msgBlock) indexCacheBuf(buf []byte) error {
}
// Make sure our dmap has this entry if it was erased.
if erased {
if erased && dms == 0 {
mb.dmap.Insert(seq)
}
// Handle FSS inline here.
if slen > 0 && !mb.noTrack && !erased && !mb.dmap.Exists(seq) {
if popFss && slen > 0 && !mb.noTrack && !erased && !mb.dmap.Exists(seq) {
bsubj := buf[index+msgHdrSize : index+msgHdrSize+uint32(slen)]
if ss := mb.fss[string(bsubj)]; ss != nil {
ss.Msgs++
@@ -5277,17 +5430,41 @@ func (mb *msgBlock) cacheNotLoaded() bool {
return !mb.cacheAlreadyLoaded()
}
// Report if our fss is not loaded.
// Lock should be held.
func (mb *msgBlock) fssNotLoaded() bool {
return mb.fss == nil && !mb.noTrack
}
// Report if we have our fss loaded.
// Lock should be held.
func (mb *msgBlock) fssLoaded() bool {
return mb.fss != nil
}
// Used to load in the block contents.
// Lock should be held and all conditionals satisfied prior.
func (mb *msgBlock) loadBlock(buf []byte) ([]byte, error) {
f, err := os.Open(mb.mfn)
if err != nil {
if os.IsNotExist(err) {
err = errNoBlkData
var f *os.File
// Re-use if we have mfd open.
if mb.mfd != nil {
f = mb.mfd
if n, err := f.Seek(0, 0); n != 0 || err != nil {
f = nil
mb.closeFDsLockedNoCheck()
}
return nil, err
}
defer f.Close()
if f == nil {
var err error
f, err = os.Open(mb.mfn)
if err != nil {
if os.IsNotExist(err) {
err = errNoBlkData
}
return nil, err
}
defer f.Close()
}
var sz int
if info, err := f.Stat(); err == nil {
@@ -5430,16 +5607,34 @@ func (mb *msgBlock) fetchMsg(seq uint64, sm *StoreMsg) (*StoreMsg, bool, error)
mb.mu.Lock()
defer mb.mu.Unlock()
fseq, lseq := atomic.LoadUint64(&mb.first.seq), atomic.LoadUint64(&mb.last.seq)
if seq < fseq || seq > lseq {
return nil, false, ErrStoreMsgNotFound
}
// See if we can short circuit if we already know msg deleted.
if mb.dmap.Exists(seq) {
// Update for scanning like cacheLookup would have.
llseq := mb.llseq
if mb.llseq == 0 || seq < mb.llseq || seq == mb.llseq+1 || seq == mb.llseq-1 {
mb.llseq = seq
}
expireOk := (seq == lseq && llseq == seq-1) || (seq == fseq && llseq == seq+1)
return nil, expireOk, errDeletedMsg
}
if mb.cacheNotLoaded() {
if err := mb.loadMsgsWithLock(); err != nil {
return nil, false, err
}
}
llseq := mb.llseq
fsm, err := mb.cacheLookup(seq, sm)
if err != nil {
return nil, false, err
}
expireOk := seq == atomic.LoadUint64(&mb.last.seq) && mb.llseq == seq
expireOk := (seq == lseq && llseq == seq-1) || (seq == fseq && llseq == seq+1)
return fsm, expireOk, err
}
@@ -5481,6 +5676,13 @@ func (mb *msgBlock) cacheLookup(seq uint64, sm *StoreMsg) (*StoreMsg, error) {
return nil, ErrStoreMsgNotFound
}
// The llseq signals us when we can expire a cache at the end of a linear scan.
// We want to only update when we know the last reads (multiple consumers) are sequential.
// We want to account for forwards and backwards linear scans.
if mb.llseq == 0 || seq < mb.llseq || seq == mb.llseq+1 || seq == mb.llseq-1 {
mb.llseq = seq
}
// If we have a delete map check it.
if mb.dmap.Exists(seq) {
mb.llts = time.Now().UnixNano()
@@ -5503,11 +5705,6 @@ func (mb *msgBlock) cacheLookup(seq uint64, sm *StoreMsg) (*StoreMsg, error) {
// Update cache activity.
mb.llts = time.Now().UnixNano()
// The llseq signals us when we can expire a cache at the end of a linear scan.
// We want to only update when we know the last reads (multiple consumers) are sequential.
if mb.llseq == 0 || seq < mb.llseq || seq == mb.llseq+1 {
mb.llseq = seq
}
li := int(bi) - mb.cache.off
if li >= len(mb.cache.buf) {
@@ -5576,7 +5773,7 @@ func (fs *fileStore) msgForSeq(seq uint64, sm *StoreMsg) (*StoreMsg, error) {
seq = fs.state.FirstSeq
}
// Make sure to snapshot here.
mb, lmb, lseq := fs.selectMsgBlock(seq), fs.lmb, fs.state.LastSeq
mb, lseq := fs.selectMsgBlock(seq), fs.state.LastSeq
fs.mu.RUnlock()
if mb == nil {
@@ -5594,7 +5791,7 @@ func (fs *fileStore) msgForSeq(seq uint64, sm *StoreMsg) (*StoreMsg, error) {
// We detected a linear scan and access to the last message.
// If we are not the last message block we can try to expire the cache.
if mb != lmb && expireOk {
if expireOk {
mb.tryForceExpireCache()
}
@@ -5767,12 +5964,14 @@ func (fs *fileStore) LoadNextMsg(filter string, wc bool, start uint64, sm *Store
for i := bi; i < len(fs.blks); i++ {
mb := fs.blks[i]
if sm, expireOk, err := mb.firstMatching(filter, wc, start, sm); err == nil {
if expireOk && mb != fs.lmb {
if expireOk {
mb.tryForceExpireCache()
}
return sm, sm.seq, nil
} else if err != ErrStoreMsgNotFound {
return nil, 0, err
} else if expireOk {
mb.tryForceExpireCache()
}
}
}
@@ -5885,6 +6084,21 @@ func fileStoreMsgSizeEstimate(slen, maxPayload int) uint64 {
return uint64(emptyRecordLen + slen + 4 + maxPayload)
}
// Determine time since any last activity, read/load, write or remove.
func (mb *msgBlock) sinceLastActivity() time.Duration {
if mb.closed {
return 0
}
last := mb.lwts
if mb.lrts > last {
last = mb.lrts
}
if mb.llts > last {
last = mb.llts
}
return time.Since(time.Unix(0, last).UTC())
}
// Determine time since last write or remove of a message.
// Read lock should be held.
func (mb *msgBlock) sinceLastWriteActivity() time.Duration {
@@ -6098,24 +6312,29 @@ func (fs *fileStore) PurgeEx(subject string, sequence, keep uint64) (purged uint
for i := 0; i < len(fs.blks); i++ {
mb := fs.blks[i]
mb.mu.Lock()
if err := mb.ensurePerSubjectInfoLoaded(); err != nil {
mb.mu.Unlock()
continue
}
// If we do not have our fss, try to expire the cache if we have no items in this block.
shouldExpire := mb.fssNotLoaded()
t, f, l := mb.filteredPendingLocked(subject, wc, atomic.LoadUint64(&mb.first.seq))
if t == 0 {
// Expire if we were responsible for loading.
if shouldExpire {
// Expire this cache before moving on.
mb.tryForceExpireCacheLocked()
}
mb.mu.Unlock()
continue
}
var shouldExpire bool
if sequence > 1 && sequence <= l {
l = sequence - 1
}
if mb.cacheNotLoaded() {
mb.loadMsgsWithLock()
shouldExpire = true
}
if sequence > 1 && sequence <= l {
l = sequence - 1
}
for seq := f; seq <= l; seq++ {
if sm, _ := mb.cacheLookup(seq, &smv); sm != nil && eq(sm.subj, subject) {
@@ -6908,6 +7127,7 @@ func (mb *msgBlock) close(sync bool) {
mb.ctmr = nil
}
// Clear fss.
mb.fss = nil
// Close cache
@@ -8835,5 +9055,4 @@ func (alg StoreCompression) Decompress(buf []byte) ([]byte, error) {
output = append(output, checksum...)
return output, reader.Close()
}

View File

@@ -71,11 +71,13 @@ type JetStreamAccountLimits struct {
}
type JetStreamTier struct {
Memory uint64 `json:"memory"`
Store uint64 `json:"storage"`
Streams int `json:"streams"`
Consumers int `json:"consumers"`
Limits JetStreamAccountLimits `json:"limits"`
Memory uint64 `json:"memory"`
Store uint64 `json:"storage"`
ReservedMemory uint64 `json:"reserved_memory"`
ReservedStore uint64 `json:"reserved_storage"`
Streams int `json:"streams"`
Consumers int `json:"consumers"`
Limits JetStreamAccountLimits `json:"limits"`
}
// JetStreamAccountStats returns current statistics about the account's JetStream usage.
@@ -1580,6 +1582,40 @@ func diffCheckedLimits(a, b map[string]JetStreamAccountLimits) map[string]JetStr
return diff
}
// Return reserved bytes for memory and store for this account on this server.
// Lock should be held.
func (jsa *jsAccount) reservedStorage(tier string) (mem, store uint64) {
for _, mset := range jsa.streams {
cfg := &mset.cfg
if tier == _EMPTY_ || tier == tierName(cfg) && cfg.MaxBytes > 0 {
switch cfg.Storage {
case FileStorage:
store += uint64(cfg.MaxBytes)
case MemoryStorage:
mem += uint64(cfg.MaxBytes)
}
}
}
return mem, store
}
// Return reserved bytes for memory and store for this account in clustered mode.
// js lock should be held.
func reservedStorage(sas map[string]*streamAssignment, tier string) (mem, store uint64) {
for _, sa := range sas {
cfg := sa.Config
if tier == _EMPTY_ || tier == tierName(cfg) && cfg.MaxBytes > 0 {
switch cfg.Storage {
case FileStorage:
store += uint64(cfg.MaxBytes)
case MemoryStorage:
mem += uint64(cfg.MaxBytes)
}
}
}
return mem, store
}
// JetStreamUsage reports on JetStream usage and limits for an account.
func (a *Account) JetStreamUsage() JetStreamAccountStats {
a.mu.RLock()
@@ -1591,6 +1627,8 @@ func (a *Account) JetStreamUsage() JetStreamAccountStats {
if jsa != nil {
js := jsa.js
js.mu.RLock()
cc := js.cluster
singleServer := cc == nil
jsa.mu.RLock()
jsa.usageMu.RLock()
stats.Memory, stats.Store = jsa.storageTotals()
@@ -1599,6 +1637,11 @@ func (a *Account) JetStreamUsage() JetStreamAccountStats {
Total: jsa.apiTotal,
Errors: jsa.apiErrors,
}
if singleServer {
stats.ReservedMemory, stats.ReservedStore = jsa.reservedStorage(_EMPTY_)
} else {
stats.ReservedMemory, stats.ReservedStore = reservedStorage(cc.streams[aname], _EMPTY_)
}
l, defaultTier := jsa.limits[_EMPTY_]
if defaultTier {
stats.Limits = l
@@ -1611,27 +1654,42 @@ func (a *Account) JetStreamUsage() JetStreamAccountStats {
// In case this shows an empty stream, that tier will be added when iterating over streams
skipped++
} else {
stats.Tiers[t] = JetStreamTier{
tier := JetStreamTier{
Memory: uint64(total.total.mem),
Store: uint64(total.total.store),
Limits: jsa.limits[t],
}
if singleServer {
tier.ReservedMemory, tier.ReservedStore = jsa.reservedStorage(t)
} else {
tier.ReservedMemory, tier.ReservedStore = reservedStorage(cc.streams[aname], t)
}
stats.Tiers[t] = tier
}
}
if len(accJsLimits) != len(jsa.usage)-skipped {
// insert unused limits
for t, lim := range accJsLimits {
if _, ok := stats.Tiers[t]; !ok {
stats.Tiers[t] = JetStreamTier{Limits: lim}
tier := JetStreamTier{Limits: lim}
if singleServer {
tier.ReservedMemory, tier.ReservedStore = jsa.reservedStorage(t)
} else {
tier.ReservedMemory, tier.ReservedStore = reservedStorage(cc.streams[aname], t)
}
stats.Tiers[t] = tier
}
}
}
}
jsa.usageMu.RUnlock()
if cc := jsa.js.cluster; cc != nil {
// Clustered
if cc := js.cluster; cc != nil {
sas := cc.streams[aname]
if defaultTier {
stats.Streams = len(sas)
stats.ReservedMemory, stats.ReservedStore = reservedStorage(sas, _EMPTY_)
}
for _, sa := range sas {
stats.Consumers += len(sa.consumers)
@@ -2086,7 +2144,7 @@ func (jsa *jsAccount) storageTotals() (uint64, uint64) {
return mem, store
}
func (jsa *jsAccount) limitsExceeded(storeType StorageType, tierName string) (bool, *ApiError) {
func (jsa *jsAccount) limitsExceeded(storeType StorageType, tierName string, replicas int) (bool, *ApiError) {
jsa.usageMu.RLock()
defer jsa.usageMu.RUnlock()
@@ -2099,20 +2157,25 @@ func (jsa *jsAccount) limitsExceeded(storeType StorageType, tierName string) (bo
// Imply totals of 0
return false, nil
}
r := int64(replicas)
if r < 1 || tierName == _EMPTY_ {
r = 1
}
// Since tiers are flat we need to scale limit up by replicas when checking.
if storeType == MemoryStorage {
totalMem := inUse.total.mem
if selectedLimits.MemoryMaxStreamBytes > 0 && totalMem > selectedLimits.MemoryMaxStreamBytes {
if selectedLimits.MemoryMaxStreamBytes > 0 && totalMem > selectedLimits.MemoryMaxStreamBytes*r {
return true, nil
}
if selectedLimits.MaxMemory >= 0 && totalMem > selectedLimits.MaxMemory {
if selectedLimits.MaxMemory >= 0 && totalMem > selectedLimits.MaxMemory*r {
return true, nil
}
} else {
totalStore := inUse.total.store
if selectedLimits.StoreMaxStreamBytes > 0 && totalStore > selectedLimits.StoreMaxStreamBytes {
if selectedLimits.StoreMaxStreamBytes > 0 && totalStore > selectedLimits.StoreMaxStreamBytes*r {
return true, nil
}
if selectedLimits.MaxStore >= 0 && totalStore > selectedLimits.MaxStore {
if selectedLimits.MaxStore >= 0 && totalStore > selectedLimits.MaxStore*r {
return true, nil
}
}
@@ -2141,28 +2204,22 @@ func (js *jetStream) checkLimits(selected *JetStreamAccountLimits, config *Strea
}
// stream limit is checked separately on stream create only!
// Check storage, memory or disk.
return js.checkBytesLimits(selected, config.MaxBytes, config.Storage, config.Replicas, checkServer, currentRes, maxBytesOffset)
return js.checkBytesLimits(selected, config.MaxBytes, config.Storage, checkServer, currentRes, maxBytesOffset)
}
// Check if additional bytes will exceed our account limits and optionally the server itself.
// This should account for replicas.
// Read Lock should be held.
func (js *jetStream) checkBytesLimits(selectedLimits *JetStreamAccountLimits, addBytes int64, storage StorageType, replicas int, checkServer bool, currentRes, maxBytesOffset int64) error {
if replicas < 1 {
replicas = 1
}
func (js *jetStream) checkBytesLimits(selectedLimits *JetStreamAccountLimits, addBytes int64, storage StorageType, checkServer bool, currentRes, maxBytesOffset int64) error {
if addBytes < 0 {
addBytes = 1
}
totalBytes := (addBytes * int64(replicas)) + maxBytesOffset
totalBytes := addBytes + maxBytesOffset
switch storage {
case MemoryStorage:
// Account limits defined.
if selectedLimits.MaxMemory >= 0 {
if currentRes+totalBytes > selectedLimits.MaxMemory {
return NewJSMemoryResourcesExceededError()
}
if selectedLimits.MaxMemory >= 0 && currentRes+totalBytes > selectedLimits.MaxMemory {
return NewJSMemoryResourcesExceededError()
}
// Check if this server can handle request.
if checkServer && js.memReserved+addBytes > js.config.MaxMemory {
@@ -2170,10 +2227,8 @@ func (js *jetStream) checkBytesLimits(selectedLimits *JetStreamAccountLimits, ad
}
case FileStorage:
// Account limits defined.
if selectedLimits.MaxStore >= 0 {
if currentRes+totalBytes > selectedLimits.MaxStore {
return NewJSStorageResourcesExceededError()
}
if selectedLimits.MaxStore >= 0 && currentRes+totalBytes > selectedLimits.MaxStore {
return NewJSStorageResourcesExceededError()
}
// Check if this server can handle request.
if checkServer && js.storeReserved+addBytes > js.config.MaxStore {

View File

@@ -21,6 +21,7 @@ import (
"math/rand"
"os"
"path/filepath"
"runtime"
"sort"
"strconv"
"strings"
@@ -217,6 +218,9 @@ const (
JSApiServerStreamCancelMove = "$JS.API.ACCOUNT.STREAM.CANCEL_MOVE.*.*"
JSApiServerStreamCancelMoveT = "$JS.API.ACCOUNT.STREAM.CANCEL_MOVE.%s.%s"
// The prefix for system level account API.
jsAPIAccountPre = "$JS.API.ACCOUNT."
// jsAckT is the template for the ack message stream coming back from a consumer
// when they ACK/NAK, etc a message.
jsAckT = "$JS.ACK.%s.%s"
@@ -346,6 +350,8 @@ type ApiResponse struct {
Error *ApiError `json:"error,omitempty"`
}
const JSApiSystemResponseType = "io.nats.jetstream.api.v1.system_response"
// When passing back to the clients generalize store failures.
var (
errStreamStoreFailed = errors.New("error creating store for stream")
@@ -738,26 +744,59 @@ type jsAPIRoutedReq struct {
}
func (js *jetStream) apiDispatch(sub *subscription, c *client, acc *Account, subject, reply string, rmsg []byte) {
// Ignore system level directives meta stepdown and peer remove requests here.
if subject == JSApiLeaderStepDown ||
subject == JSApiRemoveServer ||
strings.HasPrefix(subject, jsAPIAccountPre) {
return
}
// No lock needed, those are immutable.
s, rr := js.srv, js.apiSubs.Match(subject)
hdr, _ := c.msgParts(rmsg)
hdr, msg := c.msgParts(rmsg)
if len(getHeader(ClientInfoHdr, hdr)) == 0 {
// Check if this is the system account. We will let these through for the account info only.
if s.SystemAccount() != acc || subject != JSApiAccountInfo {
sacc := s.SystemAccount()
if sacc != acc {
return
}
if subject != JSApiAccountInfo {
// Only respond from the initial server entry to the NATS system.
if c.kind == CLIENT || c.kind == LEAF {
var resp = ApiResponse{
Type: JSApiSystemResponseType,
Error: NewJSNotEnabledForAccountError(),
}
s.sendAPIErrResponse(nil, acc, subject, reply, string(msg), s.jsonResponse(&resp))
}
return
}
}
// Shortcircuit.
// Short circuit for no interest.
if len(rr.psubs)+len(rr.qsubs) == 0 {
if (c.kind == CLIENT || c.kind == LEAF) && acc != s.SystemAccount() {
ci, acc, _, _, _ := s.getRequestInfo(c, rmsg)
var resp = ApiResponse{
Type: JSApiSystemResponseType,
Error: NewJSBadRequestError(),
}
s.sendAPIErrResponse(ci, acc, subject, reply, string(msg), s.jsonResponse(&resp))
}
return
}
// We should only have psubs and only 1 per result.
// FIXME(dlc) - Should we respond here with NoResponders or error?
if len(rr.psubs) != 1 {
s.Warnf("Malformed JetStream API Request: [%s] %q", subject, rmsg)
if c.kind == CLIENT || c.kind == LEAF {
ci, acc, _, _, _ := s.getRequestInfo(c, rmsg)
var resp = ApiResponse{
Type: JSApiSystemResponseType,
Error: NewJSBadRequestError(),
}
s.sendAPIErrResponse(ci, acc, subject, reply, string(msg), s.jsonResponse(&resp))
}
return
}
jsub := rr.psubs[0]
@@ -774,20 +813,30 @@ func (js *jetStream) apiDispatch(sub *subscription, c *client, acc *Account, sub
// If we are here we have received this request over a non-client connection.
// We need to make sure not to block. We will send the request to a long-lived
// go routine.
// pool of go routines.
// Increment inflight. Do this before queueing.
atomic.AddInt64(&js.apiInflight, 1)
// Copy the state. Note the JSAPI only uses the hdr index to piece apart the
// header from the msg body. No other references are needed.
s.jsAPIRoutedReqs.push(&jsAPIRoutedReq{jsub, sub, acc, subject, reply, copyBytes(rmsg), c.pa})
// Check pending and warn if getting backed up.
const warnThresh = 32
pending := s.jsAPIRoutedReqs.push(&jsAPIRoutedReq{jsub, sub, acc, subject, reply, copyBytes(rmsg), c.pa})
if pending > warnThresh {
s.RateLimitWarnf("JetStream request queue has high pending count: %d", pending)
}
}
func (s *Server) processJSAPIRoutedRequests() {
defer s.grWG.Done()
s.mu.Lock()
s.mu.RLock()
queue := s.jsAPIRoutedReqs
client := &client{srv: s, kind: JETSTREAM}
s.mu.Unlock()
s.mu.RUnlock()
js := s.getJetStream()
for {
select {
@@ -800,6 +849,7 @@ func (s *Server) processJSAPIRoutedRequests() {
if dur := time.Since(start); dur >= readLoopReportThreshold {
s.Warnf("Internal subscription on %q took too long: %v", r.subject, dur)
}
atomic.AddInt64(&js.apiInflight, -1)
}
queue.recycle(&reqs)
case <-s.quitCh:
@@ -816,8 +866,16 @@ func (s *Server) setJetStreamExportSubs() error {
// Start the go routine that will process API requests received by the
// subscription below when they are coming from routes, etc..
const maxProcs = 16
mp := runtime.GOMAXPROCS(0)
// Cap at 16 max for now on larger core setups.
if mp > maxProcs {
mp = maxProcs
}
s.jsAPIRoutedReqs = newIPQueue[*jsAPIRoutedReq](s, "Routed JS API Requests")
s.startGoRoutine(s.processJSAPIRoutedRequests)
for i := 0; i < mp; i++ {
s.startGoRoutine(s.processJSAPIRoutedRequests)
}
// This is the catch all now for all JetStream API calls.
if _, err := s.sysSubscribe(jsAllAPI, js.apiDispatch); err != nil {
@@ -3659,7 +3717,7 @@ func (s *Server) streamSnapshot(ci *ClientInfo, acc *Account, mset *stream, sr *
// We will place sequence number and size of chunk sent in the reply.
ackSubj := fmt.Sprintf(jsSnapshotAckT, mset.name(), nuid.Next())
ackSub, _ := mset.subscribeInternalUnlocked(ackSubj+".>", func(_ *subscription, _ *client, _ *Account, subject, _ string, _ []byte) {
ackSub, _ := mset.subscribeInternal(ackSubj+".>", func(_ *subscription, _ *client, _ *Account, subject, _ string, _ []byte) {
cs, _ := strconv.Atoi(tokenAt(subject, 6))
// This is very crude and simple, but ok for now.
// This only matters when sending multiple chunks.
@@ -3670,7 +3728,7 @@ func (s *Server) streamSnapshot(ci *ClientInfo, acc *Account, mset *stream, sr *
}
}
})
defer mset.unsubscribeUnlocked(ackSub)
defer mset.unsubscribe(ackSub)
// TODO(dlc) - Add in NATS-Chunked-Sequence header

View File

@@ -103,6 +103,8 @@ const (
removePendingRequest
// For sending compressed streams, either through RAFT or catchup.
compressedStreamMsgOp
// For sending deleted gaps on catchups for replicas.
deleteRangeOp
)
// raftGroups are controlled by the metagroup controller.
@@ -338,15 +340,16 @@ func (s *Server) JetStreamSnapshotStream(account, stream string) error {
return err
}
mset.mu.RLock()
if !mset.node.Leader() {
mset.mu.RUnlock()
return NewJSNotEnabledForAccountError()
// Hold lock when installing snapshot.
mset.mu.Lock()
if mset.node == nil {
mset.mu.Unlock()
return nil
}
n := mset.node
mset.mu.RUnlock()
err = mset.node.InstallSnapshot(mset.stateSnapshotLocked())
mset.mu.Unlock()
return n.InstallSnapshot(mset.stateSnapshot())
return err
}
func (s *Server) JetStreamClusterPeers() []string {
@@ -489,24 +492,24 @@ func (js *jetStream) restartStream(acc *Account, csa *streamAssignment) {
// isStreamHealthy will determine if the stream is up to date or very close.
// For R1 it will make sure the stream is present on this server.
func (js *jetStream) isStreamHealthy(acc *Account, sa *streamAssignment) bool {
js.mu.Lock()
js.mu.RLock()
s, cc := js.srv, js.cluster
if cc == nil {
// Non-clustered mode
js.mu.Unlock()
js.mu.RUnlock()
return true
}
// Pull the group out.
rg := sa.Group
if rg == nil {
js.mu.Unlock()
js.mu.RUnlock()
return false
}
streamName := sa.Config.Name
node := rg.node
js.mu.Unlock()
js.mu.RUnlock()
// First lookup stream and make sure its there.
mset, err := acc.lookupStream(streamName)
@@ -515,6 +518,11 @@ func (js *jetStream) isStreamHealthy(acc *Account, sa *streamAssignment) bool {
return false
}
// If we are catching up return false.
if mset.isCatchingUp() {
return false
}
if node == nil || node.Healthy() {
// Check if we are processing a snapshot and are catching up.
if !mset.isCatchingUp() {
@@ -1238,7 +1246,8 @@ func (js *jetStream) monitorCluster() {
lt := time.NewTicker(leaderCheckInterval)
defer lt.Stop()
const healthCheckInterval = 2 * time.Minute
// Check the general health once an hour.
const healthCheckInterval = 1 * time.Hour
ht := time.NewTicker(healthCheckInterval)
defer ht.Stop()
@@ -1454,18 +1463,22 @@ func (js *jetStream) clusterStreamConfig(accName, streamName string) (StreamConf
}
func (js *jetStream) metaSnapshot() []byte {
var streams []writeableStreamAssignment
js.mu.RLock()
cc := js.cluster
nsa := 0
for _, asa := range cc.streams {
nsa += len(asa)
}
streams := make([]writeableStreamAssignment, 0, nsa)
for _, asa := range cc.streams {
for _, sa := range asa {
wsa := writeableStreamAssignment{
Client: sa.Client,
Created: sa.Created,
Config: sa.Config,
Group: sa.Group,
Sync: sa.Sync,
Client: sa.Client,
Created: sa.Created,
Config: sa.Config,
Group: sa.Group,
Sync: sa.Sync,
Consumers: make([]*consumerAssignment, 0, len(sa.consumers)),
}
for _, ca := range sa.consumers {
wsa.Consumers = append(wsa.Consumers, ca)
@@ -1934,6 +1947,9 @@ func (js *jetStream) applyMetaEntries(entries []*Entry, ru *recoveryUpdates) (bo
delete(ru.removeStreams, key)
} else {
js.processUpdateStreamAssignment(sa)
// Since an update can be lowering replica count, we want upper layer to treat
// similar to a removal and snapshot to collapse old entries.
didRemoveStream = true
}
default:
panic(fmt.Sprintf("JetStream Cluster Unknown meta entry op type: %v", entryOp(buf[0])))
@@ -2192,9 +2208,13 @@ func (js *jetStream) monitorStream(mset *stream, sa *streamAssignment, sendSnaps
var lastState SimpleState
var lastSnapTime time.Time
// Don't allow the upper layer to install snapshots until we have
// fully recovered from disk.
isRecovering := true
// Should only to be called from leader.
doSnapshot := func() {
if mset == nil || isRestore || time.Since(lastSnapTime) < minSnapDelta {
if mset == nil || isRecovering || isRestore || time.Since(lastSnapTime) < minSnapDelta {
return
}
@@ -2221,7 +2241,6 @@ func (js *jetStream) monitorStream(mset *stream, sa *streamAssignment, sendSnaps
// We will establish a restoreDoneCh no matter what. Will never be triggered unless
// we replace with the restore chan.
restoreDoneCh := make(<-chan error)
isRecovering := true
// For migration tracking.
var mmt *time.Ticker
@@ -2671,7 +2690,7 @@ func (mset *stream) isMigrating() bool {
func (mset *stream) resetClusteredState(err error) bool {
mset.mu.RLock()
s, js, jsa, sa, acc, node := mset.srv, mset.js, mset.jsa, mset.sa, mset.acc, mset.node
stype, isLeader, tierName := mset.cfg.Storage, mset.isLeader(), mset.tier
stype, isLeader, tierName, replicas := mset.cfg.Storage, mset.isLeader(), mset.tier, mset.cfg.Replicas
mset.mu.RUnlock()
// Stepdown regardless if we are the leader here.
@@ -2687,12 +2706,12 @@ func (mset *stream) resetClusteredState(err error) bool {
// Server
if js.limitsExceeded(stype) {
s.Debugf("Will not reset stream, server resources exceeded")
s.Warnf("Will not reset stream, server resources exceeded")
return false
}
// Account
if exceeded, _ := jsa.limitsExceeded(stype, tierName); exceeded {
if exceeded, _ := jsa.limitsExceeded(stype, tierName, replicas); exceeded {
s.Warnf("stream '%s > %s' errored, account resources exceeded", acc, mset.name())
return false
}
@@ -3603,20 +3622,23 @@ func (js *jetStream) processClusterCreateStream(acc *Account, sa *streamAssignme
}
}
mset.setStreamAssignment(sa)
if err = mset.updateWithAdvisory(sa.Config, false); err != nil {
s.Warnf("JetStream cluster error updating stream %q for account %q: %v", sa.Config.Name, acc.Name, err)
if osa != nil {
// Process the raft group and make sure it's running if needed.
js.createRaftGroup(acc.GetName(), osa.Group, storage, pprofLabels{
"type": "stream",
"account": mset.accName(),
"stream": mset.name(),
})
mset.setStreamAssignment(osa)
}
if rg.node != nil {
rg.node.Delete()
rg.node = nil
// Check if our config has really been updated.
if !reflect.DeepEqual(mset.config(), sa.Config) {
if err = mset.updateWithAdvisory(sa.Config, false); err != nil {
s.Warnf("JetStream cluster error updating stream %q for account %q: %v", sa.Config.Name, acc.Name, err)
if osa != nil {
// Process the raft group and make sure it's running if needed.
js.createRaftGroup(acc.GetName(), osa.Group, storage, pprofLabels{
"type": "stream",
"account": mset.accName(),
"stream": mset.name(),
})
mset.setStreamAssignment(osa)
}
if rg.node != nil {
rg.node.Delete()
rg.node = nil
}
}
}
} else if err == NewJSStreamNotFoundError() {
@@ -4544,9 +4566,13 @@ func (js *jetStream) monitorConsumer(o *consumer, ca *consumerAssignment) {
var lastSnap []byte
var lastSnapTime time.Time
// Don't allow the upper layer to install snapshots until we have
// fully recovered from disk.
recovering := true
doSnapshot := func(force bool) {
// Bail if trying too fast and not in a forced situation.
if !force && time.Since(lastSnapTime) < minSnapDelta {
if recovering || (!force && time.Since(lastSnapTime) < minSnapDelta) {
return
}
@@ -4597,7 +4623,6 @@ func (js *jetStream) monitorConsumer(o *consumer, ca *consumerAssignment) {
// Track if we are leader.
var isLeader bool
recovering := true
for {
select {
@@ -5690,25 +5715,18 @@ func groupName(prefix string, peers []string, storage StorageType) string {
// returns stream count for this tier as well as applicable reservation size (not including reservations for cfg)
// jetStream read lock should be held
func tieredStreamAndReservationCount(asa map[string]*streamAssignment, tier string, cfg *StreamConfig) (int, int64) {
numStreams := len(asa)
reservation := int64(0)
if tier == _EMPTY_ {
for _, sa := range asa {
if sa.Config.MaxBytes > 0 && sa.Config.Name != cfg.Name {
if sa.Config.Storage == cfg.Storage {
reservation += (int64(sa.Config.Replicas) * sa.Config.MaxBytes)
}
}
}
} else {
numStreams = 0
for _, sa := range asa {
if isSameTier(sa.Config, cfg) {
numStreams++
if sa.Config.MaxBytes > 0 {
if sa.Config.Storage == cfg.Storage && sa.Config.Name != cfg.Name {
reservation += (int64(sa.Config.Replicas) * sa.Config.MaxBytes)
}
var numStreams int
var reservation int64
for _, sa := range asa {
if tier == _EMPTY_ || isSameTier(sa.Config, cfg) {
numStreams++
if sa.Config.MaxBytes > 0 && sa.Config.Storage == cfg.Storage && sa.Config.Name != cfg.Name {
// If tier is empty, all storage is flat and we should adjust for replicas.
// Otherwise if tiered, storage replication already taken into consideration.
if tier == _EMPTY_ && cfg.Replicas > 1 {
reservation += sa.Config.MaxBytes * int64(cfg.Replicas)
} else {
reservation += sa.Config.MaxBytes
}
}
}
@@ -5812,40 +5830,18 @@ func (s *Server) jsClusteredStreamRequest(ci *ClientInfo, acc *Account, subject,
js.mu.Lock()
defer js.mu.Unlock()
// Capture if we have existing assignment first.
osa := js.streamAssignment(acc.Name, cfg.Name)
var areEqual bool
if osa != nil {
areEqual = reflect.DeepEqual(osa.Config, cfg)
}
var self *streamAssignment
var rg *raftGroup
// If this stream already exists, turn this into a stream info call.
if osa != nil {
// If they are the same then we will forward on as a stream info request.
// This now matches single server behavior.
if areEqual {
// This works when we have a stream leader. If we have no leader let the dupe
// go through as normal. We will handle properly on the other end.
// We must check interest at the $SYS account layer, not user account since import
// will always show interest.
sisubj := fmt.Sprintf(clusterStreamInfoT, acc.Name, cfg.Name)
if s.SystemAccount().Interest(sisubj) > 0 {
isubj := fmt.Sprintf(JSApiStreamInfoT, cfg.Name)
// We want to make sure we send along the client info.
cij, _ := json.Marshal(ci)
hdr := map[string]string{
ClientInfoHdr: string(cij),
JSResponseType: jsCreateResponse,
}
// Send this as system account, but include client info header.
s.sendInternalAccountMsgWithReply(nil, isubj, reply, hdr, nil, true)
return
}
} else {
// Capture if we have existing assignment first.
if osa := js.streamAssignment(acc.Name, cfg.Name); osa != nil {
if !reflect.DeepEqual(osa.Config, cfg) {
resp.Error = NewJSStreamNameExistError()
s.sendAPIErrResponse(ci, acc, subject, reply, string(rmsg), s.jsonResponse(&resp))
return
}
// This is an equal assignment.
self, rg = osa, osa.Group
}
if cfg.Sealed {
@@ -5854,11 +5850,6 @@ func (s *Server) jsClusteredStreamRequest(ci *ClientInfo, acc *Account, subject,
return
}
var self *streamAssignment
if osa != nil && areEqual {
self = osa
}
// Check for subject collisions here.
if cc.subjectsOverlap(acc.Name, cfg.Subjects, self) {
resp.Error = NewJSStreamSubjectOverlapError()
@@ -5875,10 +5866,7 @@ func (s *Server) jsClusteredStreamRequest(ci *ClientInfo, acc *Account, subject,
}
// Raft group selection and placement.
var rg *raftGroup
if osa != nil && areEqual {
rg = osa.Group
} else {
if rg == nil {
// Check inflight before proposing in case we have an existing inflight proposal.
if cc.inflight == nil {
cc.inflight = make(map[string]map[string]*raftGroup)
@@ -5892,7 +5880,7 @@ func (s *Server) jsClusteredStreamRequest(ci *ClientInfo, acc *Account, subject,
rg = existing
}
}
// Create a new one here.
// Create a new one here if needed.
if rg == nil {
nrg, err := js.createGroupForStream(ci, cfg)
if err != nil {
@@ -6860,6 +6848,22 @@ func decodeStreamAssignment(buf []byte) (*streamAssignment, error) {
return &sa, err
}
func encodeDeleteRange(dr *DeleteRange) []byte {
var bb bytes.Buffer
bb.WriteByte(byte(deleteRangeOp))
json.NewEncoder(&bb).Encode(dr)
return bb.Bytes()
}
func decodeDeleteRange(buf []byte) (*DeleteRange, error) {
var dr DeleteRange
err := json.Unmarshal(buf, &dr)
if err != nil {
return nil, err
}
return &dr, err
}
// createGroupForConsumer will create a new group from same peer set as the stream.
func (cc *jetStreamCluster) createGroupForConsumer(cfg *ConsumerConfig, sa *streamAssignment) *raftGroup {
if len(sa.Group.Peers) == 0 || cfg.Replicas > len(sa.Group.Peers) {
@@ -7433,7 +7437,7 @@ func (mset *stream) processClusteredInboundMsg(subject, reply string, hdr, msg [
mset.mu.RLock()
canRespond := !mset.cfg.NoAck && len(reply) > 0
name, stype, store := mset.cfg.Name, mset.cfg.Storage, mset.store
s, js, jsa, st, rf, tierName, outq, node := mset.srv, mset.js, mset.jsa, mset.cfg.Storage, mset.cfg.Replicas, mset.tier, mset.outq, mset.node
s, js, jsa, st, r, tierName, outq, node := mset.srv, mset.js, mset.jsa, mset.cfg.Storage, int64(mset.cfg.Replicas), mset.tier, mset.outq, mset.node
maxMsgSize, lseq, clfs := int(mset.cfg.MaxMsgSize), mset.lseq, mset.clfs
isLeader, isSealed := mset.isLeader(), mset.cfg.Sealed
mset.mu.RUnlock()
@@ -7491,16 +7495,20 @@ func (mset *stream) processClusteredInboundMsg(subject, reply string, hdr, msg [
t = &jsaStorage{}
jsa.usage[tierName] = t
}
if st == MemoryStorage {
total := t.total.store + int64(memStoreMsgSize(subject, hdr, msg)*uint64(rf))
if jsaLimits.MaxMemory > 0 && total > jsaLimits.MaxMemory {
exceeded = true
}
} else {
total := t.total.store + int64(fileStoreMsgSize(subject, hdr, msg)*uint64(rf))
if jsaLimits.MaxStore > 0 && total > jsaLimits.MaxStore {
exceeded = true
}
// Make sure replicas is correct.
if r < 1 {
r = 1
}
// This is for limits. If we have no tier, consider all to be flat, vs tiers like R3 where we want to scale limit by replication.
lr := r
if tierName == _EMPTY_ {
lr = 1
}
// Tiers are flat, meaning the limit for R3 will be 100GB, not 300GB, so compare to total but adjust limits.
if st == MemoryStorage && jsaLimits.MaxMemory > 0 {
exceeded = t.total.mem+(int64(memStoreMsgSize(subject, hdr, msg))*r) > (jsaLimits.MaxMemory * lr)
} else if jsaLimits.MaxStore > 0 {
exceeded = t.total.store+(int64(fileStoreMsgSize(subject, hdr, msg))*r) > (jsaLimits.MaxStore * lr)
}
jsa.usageMu.Unlock()
@@ -7626,9 +7634,10 @@ func (mset *stream) processClusteredInboundMsg(subject, reply string, hdr, msg [
// For requesting messages post raft snapshot to catch up streams post server restart.
// Any deleted msgs etc will be handled inline on catchup.
type streamSyncRequest struct {
Peer string `json:"peer,omitempty"`
FirstSeq uint64 `json:"first_seq"`
LastSeq uint64 `json:"last_seq"`
Peer string `json:"peer,omitempty"`
FirstSeq uint64 `json:"first_seq"`
LastSeq uint64 `json:"last_seq"`
DeleteRangesOk bool `json:"delete_ranges"`
}
// Given a stream state that represents a snapshot, calculate the sync request based on our current state.
@@ -7637,7 +7646,7 @@ func (mset *stream) calculateSyncRequest(state *StreamState, snap *StreamReplica
if state.LastSeq >= snap.LastSeq {
return nil
}
return &streamSyncRequest{FirstSeq: state.LastSeq + 1, LastSeq: snap.LastSeq, Peer: mset.node.ID()}
return &streamSyncRequest{FirstSeq: state.LastSeq + 1, LastSeq: snap.LastSeq, Peer: mset.node.ID(), DeleteRangesOk: true}
}
// processSnapshotDeletes will update our current store based on the snapshot
@@ -7647,14 +7656,22 @@ func (mset *stream) processSnapshotDeletes(snap *StreamReplicatedState) {
var state StreamState
mset.store.FastState(&state)
// Always adjust if FirstSeq has moved beyond our state.
var didReset bool
if snap.FirstSeq > state.FirstSeq {
mset.store.Compact(snap.FirstSeq)
mset.store.FastState(&state)
mset.lseq = state.LastSeq
mset.clearAllPreAcksBelowFloor(state.FirstSeq)
didReset = true
}
s := mset.srv
mset.mu.Unlock()
if didReset {
s.Warnf("Catchup for stream '%s > %s' resetting first sequence: %d on catchup request",
mset.account(), mset.name(), snap.FirstSeq)
}
if len(snap.Deleted) > 0 {
mset.store.SyncDeleted(snap.Deleted)
}
@@ -7684,6 +7701,22 @@ func (mset *stream) updateCatchupPeer(peer string) {
mset.mu.Unlock()
}
func (mset *stream) decrementCatchupPeer(peer string, num uint64) {
if peer == _EMPTY_ {
return
}
mset.mu.Lock()
if lag := mset.catchups[peer]; lag > 0 {
if lag >= num {
lag -= num
} else {
lag = 0
}
mset.catchups[peer] = lag
}
mset.mu.Unlock()
}
func (mset *stream) clearCatchupPeer(peer string) {
mset.mu.Lock()
if mset.catchups != nil {
@@ -7715,21 +7748,15 @@ func (mset *stream) hasCatchupPeers() bool {
}
func (mset *stream) setCatchingUp() {
mset.mu.Lock()
mset.catchup = true
mset.mu.Unlock()
mset.catchup.Store(true)
}
func (mset *stream) clearCatchingUp() {
mset.mu.Lock()
mset.catchup = false
mset.mu.Unlock()
mset.catchup.Store(false)
}
func (mset *stream) isCatchingUp() bool {
mset.mu.RLock()
defer mset.mu.RUnlock()
return mset.catchup
return mset.catchup.Load()
}
// Determine if a non-leader is current.
@@ -7738,7 +7765,7 @@ func (mset *stream) isCurrent() bool {
if mset.node == nil {
return true
}
return mset.node.Current() && !mset.catchup
return mset.node.Current() && !mset.catchup.Load()
}
// Maximum requests for the whole server that can be in flight at the same time.
@@ -7762,7 +7789,6 @@ func (mset *stream) processSnapshot(snap *StreamReplicatedState) (e error) {
mset.store.FastState(&state)
mset.setCLFS(snap.Failed)
sreq := mset.calculateSyncRequest(&state, snap)
s, js, subject, n := mset.srv, mset.js, mset.sa.Sync, mset.node
qname := fmt.Sprintf("[ACC:%s] stream '%s' snapshot", mset.acc.Name, mset.cfg.Name)
mset.mu.Unlock()
@@ -7796,7 +7822,7 @@ func (mset *stream) processSnapshot(snap *StreamReplicatedState) (e error) {
var sub *subscription
var err error
const activityInterval = 10 * time.Second
const activityInterval = 30 * time.Second
notActive := time.NewTimer(activityInterval)
defer notActive.Stop()
@@ -7832,36 +7858,12 @@ func (mset *stream) processSnapshot(snap *StreamReplicatedState) (e error) {
defer releaseSyncOutSem()
// Check our final state when we exit cleanly.
// If this snapshot was for messages no longer held by the leader we want to make sure
// we are synched for the next message sequence properly.
lastRequested := sreq.LastSeq
// This will make sure we have interest consumers updated.
checkFinalState := func() {
// Bail if no stream.
if mset == nil {
return
}
mset.mu.Lock()
var state StreamState
mset.store.FastState(&state)
var didReset bool
firstExpected := lastRequested + 1
if state.FirstSeq != firstExpected {
// Reset our notion of first.
mset.store.Compact(firstExpected)
mset.store.FastState(&state)
// Make sure last is also correct in case this also moved.
mset.lseq = state.LastSeq
mset.clearAllPreAcksBelowFloor(state.FirstSeq)
didReset = true
}
mset.mu.Unlock()
if didReset {
s.Warnf("Catchup for stream '%s > %s' resetting first sequence: %d on catchup complete",
mset.account(), mset.name(), firstExpected)
}
mset.mu.RLock()
consumers := make([]*consumer, 0, len(mset.consumers))
for _, o := range mset.consumers {
@@ -7907,7 +7909,7 @@ RETRY:
<-s.syncOutSem
releaseSem = true
// We may have been blocked for a bit, so the reset need to ensure that we
// We may have been blocked for a bit, so the reset needs to ensure that we
// consume the already fired timer.
if !notActive.Stop() {
select {
@@ -7927,8 +7929,6 @@ RETRY:
if sreq == nil {
return nil
}
// Reset notion of lastRequested
lastRequested = sreq.LastSeq
}
// Used to transfer message from the wire to another Go routine internally.
@@ -7951,22 +7951,20 @@ RETRY:
// Send our catchup request here.
reply := syncReplySubject()
sub, err = s.sysSubscribe(reply, func(_ *subscription, _ *client, _ *Account, _, reply string, msg []byte) {
// Make copies
// TODO(dlc) - Since we are using a buffer from the inbound client/route.
// Make copy since we are using a buffer from the inbound client/route.
msgsQ.push(&im{copyBytes(msg), reply})
})
if err != nil {
s.Errorf("Could not subscribe to stream catchup: %v", err)
goto RETRY
}
// Send our sync request.
b, _ := json.Marshal(sreq)
s.sendInternalMsgLocked(subject, reply, nil, b)
// Remember when we sent this out to avoimd loop spins on errors below.
// Remember when we sent this out to avoid loop spins on errors below.
reqSendTime := time.Now()
// Clear our sync request and capture last.
last := sreq.LastSeq
// Clear our sync request.
sreq = nil
// Run our own select loop here.
@@ -7976,24 +7974,18 @@ RETRY:
notActive.Reset(activityInterval)
mrecs := msgsQ.pop()
for _, mrec := range mrecs {
msg := mrec.msg
// Check for eof signaling.
if len(msg) == 0 {
msgsQ.recycle(&mrecs)
checkFinalState()
return nil
}
if lseq, err := mset.processCatchupMsg(msg); err == nil {
if _, err := mset.processCatchupMsg(msg); err == nil {
if mrec.reply != _EMPTY_ {
s.sendInternalMsgLocked(mrec.reply, _EMPTY_, nil, nil)
}
if lseq >= last {
msgsQ.recycle(&mrecs)
return nil
}
} else if isOutOfSpaceErr(err) {
notifyLeaderStopCatchup(mrec, err)
return err
@@ -8026,6 +8018,7 @@ RETRY:
goto RETRY
}
}
notActive.Reset(activityInterval)
msgsQ.recycle(&mrecs)
case <-notActive.C:
if mrecs := msgsQ.pop(); len(mrecs) > 0 {
@@ -8054,11 +8047,34 @@ func (mset *stream) processCatchupMsg(msg []byte) (uint64, error) {
return 0, errCatchupBadMsg
}
op := entryOp(msg[0])
if op != streamMsgOp && op != compressedStreamMsgOp {
if op != streamMsgOp && op != compressedStreamMsgOp && op != deleteRangeOp {
return 0, errCatchupBadMsg
}
mbuf := msg[1:]
if op == deleteRangeOp {
dr, err := decodeDeleteRange(mbuf)
if err != nil {
return 0, errCatchupBadMsg
}
// Handle the delete range.
// Make sure the sequences match up properly.
mset.mu.Lock()
if len(mset.preAcks) > 0 {
for seq := dr.First; seq < dr.First+dr.Num; seq++ {
mset.clearAllPreAcks(seq)
}
}
if err = mset.store.SkipMsgs(dr.First, dr.Num); err != nil {
mset.mu.Unlock()
return 0, errCatchupWrongSeqForSkip
}
mset.lseq = dr.First + dr.Num - 1
lseq := mset.lseq
mset.mu.Unlock()
return lseq, nil
}
if op == compressedStreamMsgOp {
var err error
mbuf, err = s2.Decode(nil, mbuf)
@@ -8076,6 +8092,7 @@ func (mset *stream) processCatchupMsg(msg []byte) (uint64, error) {
st := mset.cfg.Storage
ddloaded := mset.ddloaded
tierName := mset.tier
replicas := mset.cfg.Replicas
if mset.hasAllPreAcks(seq, subj) {
mset.clearAllPreAcks(seq)
@@ -8086,7 +8103,7 @@ func (mset *stream) processCatchupMsg(msg []byte) (uint64, error) {
if mset.js.limitsExceeded(st) {
return 0, NewJSInsufficientResourcesError()
} else if exceeded, apiErr := mset.jsa.limitsExceeded(st, tierName); apiErr != nil {
} else if exceeded, apiErr := mset.jsa.limitsExceeded(st, tierName, replicas); apiErr != nil {
return 0, apiErr
} else if exceeded {
return 0, NewJSInsufficientResourcesError()
@@ -8400,8 +8417,8 @@ func (mset *stream) runCatchup(sendSubject string, sreq *streamSyncRequest) {
s := mset.srv
defer s.grWG.Done()
const maxOutBytes = int64(8 * 1024 * 1024) // 8MB for now, these are all internal, from server to server
const maxOutMsgs = int32(32 * 1024)
const maxOutBytes = int64(64 * 1024 * 1024) // 64MB for now, these are all internal, from server to server
const maxOutMsgs = int32(256 * 1024) // 256k in case we have lots of small messages or skip msgs.
outb := int64(0)
outm := int32(0)
@@ -8420,6 +8437,10 @@ func (mset *stream) runCatchup(sendSubject string, sreq *streamSyncRequest) {
nextBatchC <- struct{}{}
remoteQuitCh := make(chan struct{})
const activityInterval = 30 * time.Second
notActive := time.NewTimer(activityInterval)
defer notActive.Stop()
// Setup ackReply for flow control.
ackReply := syncAckSubject()
ackSub, _ := s.sysSubscribe(ackReply, func(sub *subscription, c *client, _ *Account, subject, reply string, msg []byte) {
@@ -8437,16 +8458,14 @@ func (mset *stream) runCatchup(sendSubject string, sreq *streamSyncRequest) {
// Kick ourselves and anyone else who might have stalled on global state.
select {
case nextBatchC <- struct{}{}:
// Reset our activity
notActive.Reset(activityInterval)
default:
}
})
defer s.sysUnsubscribe(ackSub)
ackReplyT := strings.ReplaceAll(ackReply, ".*", ".%d")
const activityInterval = 5 * time.Second
notActive := time.NewTimer(activityInterval)
defer notActive.Stop()
// Grab our state.
var state StreamState
mset.mu.RLock()
@@ -8469,10 +8488,9 @@ func (mset *stream) runCatchup(sendSubject string, sreq *streamSyncRequest) {
compressOk := mset.compressAllowed()
var spb int
sendNextBatchAndContinue := func(qch chan struct{}) bool {
// Update our activity timer.
notActive.Reset(activityInterval)
const minWait = 5 * time.Second
sendNextBatchAndContinue := func(qch chan struct{}) bool {
// Check if we know we will not enter the loop because we are done.
if seq > last {
s.Noticef("Catchup for stream '%s > %s' complete", mset.account(), mset.name())
@@ -8481,15 +8499,23 @@ func (mset *stream) runCatchup(sendSubject string, sreq *streamSyncRequest) {
return false
}
// If we already sent a batch, we will try to make sure we process around
// half the FC responses - or reach a certain amount of time - before sending
// the next batch.
// If we already sent a batch, we will try to make sure we can at least send a minimum
// batch before sending the next batch.
if spb > 0 {
mw := time.NewTimer(100 * time.Millisecond)
// Wait til we can send at least 4k
const minBatchWait = int32(4 * 1024)
mw := time.NewTimer(minWait)
for done := false; !done; {
select {
case <-nextBatchC:
done = int(atomic.LoadInt32(&outm)) <= spb/2
done = maxOutMsgs-atomic.LoadInt32(&outm) > minBatchWait
if !done {
// Wait for a small bit.
time.Sleep(50 * time.Millisecond)
} else {
// GC friendly.
mw.Stop()
}
case <-mw.C:
done = true
case <-s.quitCh:
@@ -8503,8 +8529,38 @@ func (mset *stream) runCatchup(sendSubject string, sreq *streamSyncRequest) {
spb = 0
}
var smv StoreMsg
// Send an encoded msg.
sendEM := func(em []byte) {
// Place size in reply subject for flow control.
l := int64(len(em))
reply := fmt.Sprintf(ackReplyT, l)
s.gcbAdd(&outb, l)
atomic.AddInt32(&outm, 1)
s.sendInternalMsgLocked(sendSubject, reply, nil, em)
spb++
}
// If we support gap markers.
var dr DeleteRange
drOk := sreq.DeleteRangesOk
// Will send our delete range.
// Should already be checked for being valid.
sendDR := func() {
if dr.Num == 1 {
// Send like a normal skip msg.
sendEM(encodeStreamMsg(_EMPTY_, _EMPTY_, nil, nil, dr.First, 0))
} else {
// We have a run, send a gap record. We send these without reply or tracking.
s.sendInternalMsgLocked(sendSubject, _EMPTY_, nil, encodeDeleteRange(&dr))
// Clear out the pending for catchup.
mset.decrementCatchupPeer(sreq.Peer, dr.Num)
}
// Reset always.
dr.First, dr.Num = 0, 0
}
var smv StoreMsg
for ; seq <= last && atomic.LoadInt64(&outb) <= maxOutBytes && atomic.LoadInt32(&outm) <= maxOutMsgs && s.gcbBelowMax(); seq++ {
sm, err := mset.store.LoadMsg(seq, &smv)
// if this is not a deleted msg, bail out.
@@ -8530,22 +8586,33 @@ func (mset *stream) runCatchup(sendSubject string, sreq *streamSyncRequest) {
s.Warnf("Error loading message for catchup '%s > %s': %v", mset.account(), mset.name(), err)
return false
}
var em []byte
if sm != nil {
em = encodeStreamMsgAllowCompress(sm.subj, _EMPTY_, sm.hdr, sm.msg, sm.seq, sm.ts, compressOk)
// If we allow gap markers check if we have one pending.
if drOk && dr.First > 0 {
sendDR()
}
// Send the normal message now.
sendEM(encodeStreamMsgAllowCompress(sm.subj, _EMPTY_, sm.hdr, sm.msg, sm.seq, sm.ts, compressOk))
} else {
// Skip record for deleted msg.
em = encodeStreamMsg(_EMPTY_, _EMPTY_, nil, nil, seq, 0)
if drOk {
if dr.First == 0 {
dr.First, dr.Num = seq, 1
} else {
dr.Num++
}
} else {
// Skip record for deleted msg.
sendEM(encodeStreamMsg(_EMPTY_, _EMPTY_, nil, nil, seq, 0))
}
}
// Place size in reply subject for flow control.
l := int64(len(em))
reply := fmt.Sprintf(ackReplyT, l)
s.gcbAdd(&outb, l)
atomic.AddInt32(&outm, 1)
s.sendInternalMsgLocked(sendSubject, reply, nil, em)
spb++
// Check if we are done.
if seq == last {
// Need to see if we have a pending delete range.
if drOk && dr.First > 0 {
sendDR()
}
s.Noticef("Catchup for stream '%s > %s' complete", mset.account(), mset.name())
// EOF
s.sendInternalMsgLocked(sendSubject, _EMPTY_, nil, nil)
@@ -8557,10 +8624,14 @@ func (mset *stream) runCatchup(sendSubject string, sreq *streamSyncRequest) {
default:
}
}
if drOk && dr.First > 0 {
sendDR()
}
return true
}
// Grab stream quit channel.
// Check is this stream got closed.
mset.mu.RLock()
qch := mset.qch
mset.mu.RUnlock()
@@ -8584,6 +8655,7 @@ func (mset *stream) runCatchup(sendSubject string, sreq *streamSyncRequest) {
return
case <-notActive.C:
s.Warnf("Catchup for stream '%s > %s' stalled", mset.account(), mset.name())
mset.clearCatchupPeer(sreq.Peer)
return
case <-nextBatchC:
if !sendNextBatchAndContinue(qch) {

View File

@@ -1,4 +1,4 @@
// Copyright 2019-2023 The NATS Authors
// Copyright 2019-2024 The NATS Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -239,7 +239,7 @@ func validateLeafNode(o *Options) error {
}
} else {
if len(o.LeafNode.Users) != 0 {
return fmt.Errorf("operator mode does not allow specifying user in leafnode config")
return fmt.Errorf("operator mode does not allow specifying users in leafnode config")
}
for _, r := range o.LeafNode.Remotes {
if !nkeys.IsValidPublicAccountKey(r.LocalAccount) {
@@ -299,12 +299,12 @@ func validateLeafNode(o *Options) error {
// with gateways. So if an option validation needs to be done regardless,
// it MUST be done before this point!
if o.Gateway.Name == "" && o.Gateway.Port == 0 {
if o.Gateway.Name == _EMPTY_ && o.Gateway.Port == 0 {
return nil
}
// If we are here we have both leaf nodes and gateways defined, make sure there
// is a system account defined.
if o.SystemAccount == "" {
if o.SystemAccount == _EMPTY_ {
return fmt.Errorf("leaf nodes and gateways (both being defined) require a system account to also be configured")
}
if err := validatePinnedCerts(o.LeafNode.TLSPinnedCerts); err != nil {
@@ -334,6 +334,9 @@ func validateLeafNodeAuthOptions(o *Options) error {
if o.LeafNode.Username != _EMPTY_ {
return fmt.Errorf("can not have a single user/pass and a users array")
}
if o.LeafNode.Nkey != _EMPTY_ {
return fmt.Errorf("can not have a single nkey and a users array")
}
users := map[string]struct{}{}
for _, u := range o.LeafNode.Users {
if _, exists := users[u.Username]; exists {
@@ -830,6 +833,19 @@ func (c *client) sendLeafConnect(clusterName string, headers bool) error {
sig := base64.RawURLEncoding.EncodeToString(sigraw)
cinfo.JWT = bytesToString(tmp)
cinfo.Sig = sig
} else if nkey := c.leaf.remote.Nkey; nkey != _EMPTY_ {
kp, err := nkeys.FromSeed([]byte(nkey))
if err != nil {
c.Errorf("Remote nkey has malformed seed")
return err
}
// Wipe our key on exit.
defer kp.Wipe()
sigraw, _ := kp.Sign(c.nonce)
sig := base64.RawURLEncoding.EncodeToString(sigraw)
pkey, _ := kp.PublicKey()
cinfo.Nkey = pkey
cinfo.Sig = sig
} else if userInfo := c.leaf.remote.curURL.User; userInfo != nil {
cinfo.User = userInfo.Username()
cinfo.Pass, _ = userInfo.Password()
@@ -839,7 +855,7 @@ func (c *client) sendLeafConnect(clusterName string, headers bool) error {
}
b, err := json.Marshal(cinfo)
if err != nil {
c.Errorf("Error marshaling CONNECT to route: %v\n", err)
c.Errorf("Error marshaling CONNECT to remote leafnode: %v\n", err)
return err
}
// Although this call is made before the writeLoop is created,
@@ -1688,6 +1704,7 @@ func (s *Server) removeLeafNodeConnection(c *client) {
// Connect information for solicited leafnodes.
type leafConnectInfo struct {
Version string `json:"version,omitempty"`
Nkey string `json:"nkey,omitempty"`
JWT string `json:"jwt,omitempty"`
Sig string `json:"sig,omitempty"`
User string `json:"user,omitempty"`
@@ -2169,6 +2186,28 @@ func (c *client) forceAddToSmap(subj string) {
c.sendLeafNodeSubUpdate(subj, 1)
}
// Used to force remove a subject from the subject map.
func (c *client) forceRemoveFromSmap(subj string) {
c.mu.Lock()
defer c.mu.Unlock()
if c.leaf.smap == nil {
return
}
n := c.leaf.smap[subj]
if n == 0 {
return
}
n--
if n == 0 {
// Remove is now zero
delete(c.leaf.smap, subj)
c.sendLeafNodeSubUpdate(subj, 0)
} else {
c.leaf.smap[subj] = n
}
}
// Send the subscription interest change to the other side.
// Lock should be held.
func (c *client) sendLeafNodeSubUpdate(key string, n int32) {

View File

@@ -256,12 +256,42 @@ func (ms *memStore) SkipMsg() uint64 {
if ms.state.Msgs == 0 {
ms.state.FirstSeq = seq
ms.state.FirstTime = now
} else {
ms.dmap.Insert(seq)
}
ms.updateFirstSeq(seq)
ms.mu.Unlock()
return seq
}
// Skip multiple msgs.
func (ms *memStore) SkipMsgs(seq uint64, num uint64) error {
// Grab time.
now := time.Now().UTC()
ms.mu.Lock()
defer ms.mu.Unlock()
// Check sequence matches our last sequence.
if seq != ms.state.LastSeq+1 {
if seq > 0 {
return ErrSequenceMismatch
}
seq = ms.state.LastSeq + 1
}
lseq := seq + num - 1
ms.state.LastSeq = lseq
ms.state.LastTime = now
if ms.state.Msgs == 0 {
ms.state.FirstSeq, ms.state.FirstTime = lseq+1, now
} else {
for ; seq <= lseq; seq++ {
ms.dmap.Insert(seq)
}
}
return nil
}
// RegisterStorageUpdates registers a callback for updates to storage changes.
// It will present number of messages and bytes as a signed integer and an
// optional sequence number of the message if a single.
@@ -1058,9 +1088,6 @@ func (ms *memStore) updateFirstSeq(seq uint64) {
ms.dmap.Delete(seq)
}
}
if ms.dmap.IsEmpty() {
ms.dmap.SetInitialMin(ms.state.FirstSeq)
}
}
// Remove a seq from the fss and select new first.
@@ -1186,13 +1213,16 @@ func (ms *memStore) State() StreamState {
// Calculate interior delete details.
if numDeleted := int((state.LastSeq - state.FirstSeq + 1) - state.Msgs); numDeleted > 0 {
state.Deleted = make([]uint64, 0, state.NumDeleted)
// TODO(dlc) - Too Simplistic, once state is updated to allow runs etc redo.
for seq := state.FirstSeq + 1; seq < ms.state.LastSeq; seq++ {
if _, ok := ms.msgs[seq]; !ok {
state.Deleted = make([]uint64, 0, numDeleted)
fseq, lseq := state.FirstSeq, state.LastSeq
ms.dmap.Range(func(seq uint64) bool {
if seq < fseq || seq > lseq {
ms.dmap.Delete(seq)
} else {
state.Deleted = append(state.Deleted, seq)
}
}
return true
})
}
if len(state.Deleted) > 0 {
state.NumDeleted = len(state.Deleted)

View File

@@ -3148,17 +3148,17 @@ func (t HealthZErrorType) MarshalJSON() ([]byte, error) {
func (t *HealthZErrorType) UnmarshalJSON(data []byte) error {
switch string(data) {
case jsonString("CONNECTION"):
case `"CONNECTION"`:
*t = HealthzErrorConn
case jsonString("BAD_REQUEST"):
case `"BAD_REQUEST"`:
*t = HealthzErrorBadRequest
case jsonString("JETSTREAM"):
case `"JETSTREAM"`:
*t = HealthzErrorJetStream
case jsonString("ACCOUNT"):
case `"ACCOUNT"`:
*t = HealthzErrorAccount
case jsonString("STREAM"):
case `"STREAM"`:
*t = HealthzErrorStream
case jsonString("CONSUMER"):
case `"CONSUMER"`:
*t = HealthzErrorConsumer
default:
return fmt.Errorf("unknown healthz error type %q", data)

View File

@@ -186,17 +186,18 @@ const (
mqttInitialPubHeader = 16 // An overkill, should need 7 bytes max
mqttProcessSubTooLong = 100 * time.Millisecond
mqttRetainedCacheTTL = 2 * time.Minute
mqttDefaultRetainedCacheTTL = 2 * time.Minute
mqttRetainedTransferTimeout = 10 * time.Second
)
var (
mqttPingResponse = []byte{mqttPacketPingResp, 0x0}
mqttProtoName = []byte("MQTT")
mqttOldProtoName = []byte("MQIsdp")
mqttSessJailDur = mqttSessFlappingJailDur
mqttFlapCleanItvl = mqttSessFlappingCleanupInterval
mqttJSAPITimeout = 4 * time.Second
mqttPingResponse = []byte{mqttPacketPingResp, 0x0}
mqttProtoName = []byte("MQTT")
mqttOldProtoName = []byte("MQIsdp")
mqttSessJailDur = mqttSessFlappingJailDur
mqttFlapCleanItvl = mqttSessFlappingCleanupInterval
mqttJSAPITimeout = 4 * time.Second
mqttRetainedCacheTTL = mqttDefaultRetainedCacheTTL
)
var (
@@ -217,7 +218,7 @@ var (
errMQTTEmptyUsername = errors.New("empty user name not allowed")
errMQTTTopicIsEmpty = errors.New("topic cannot be empty")
errMQTTPacketIdentifierIsZero = errors.New("packet identifier cannot be 0")
errMQTTUnsupportedCharacters = errors.New("characters ' ' and '.' not supported for MQTT topics")
errMQTTUnsupportedCharacters = errors.New("character ' ' not supported for MQTT topics")
errMQTTInvalidSession = errors.New("invalid MQTT session")
)
@@ -242,20 +243,25 @@ type mqttAccountSessionManager struct {
flapTimer *time.Timer // Timer to perform some cleanup of the flappers map
sl *Sublist // sublist allowing to find retained messages for given subscription
retmsgs map[string]*mqttRetainedMsgRef // retained messages
rmsCache sync.Map // map[string(subject)]mqttRetainedMsg
rmsCache sync.Map // map[subject]*mqttRetainedMsg
jsa mqttJSA
rrmLastSeq uint64 // Restore retained messages expected last sequence
rrmDoneCh chan struct{} // To notify the caller that all retained messages have been loaded
domainTk string // Domain (with trailing "."), or possibly empty. This is added to session subject.
}
type mqttJSAResponse struct {
reply string // will be used to map to the original request in jsa.NewRequestExMulti
value any
}
type mqttJSA struct {
mu sync.Mutex
id string
c *client
sendq *ipQueue[*mqttJSPubMsg]
rplyr string
replies sync.Map
replies sync.Map // [string]chan *mqttJSAResponse
nuid *nuid.NUID
quitCh chan struct{}
domain string // Domain or possibly empty. This is added to session subject.
@@ -1232,7 +1238,7 @@ func (s *Server) mqttCreateAccountSessionManager(acc *Account, quitCh chan struc
// Start the go routine that will clean up cached retained messages that expired.
s.startGoRoutine(func() {
defer s.grWG.Done()
as.cleaupRetainedMessageCache(s, closeCh)
as.cleanupRetainedMessageCache(s, closeCh)
})
lookupStream := func(stream, txt string) (*StreamInfo, error) {
@@ -1523,51 +1529,106 @@ func (jsa *mqttJSA) prefixDomain(subject string) string {
return subject
}
func (jsa *mqttJSA) newRequestEx(kind, subject, cidHash string, hdr int, msg []byte, timeout time.Duration) (interface{}, error) {
var sb strings.Builder
// Either we use nuid.Next() which uses a global lock, or our own nuid object, but
// then it needs to be "write" protected. This approach will reduce across account
// contention since we won't use the global nuid's lock.
jsa.mu.Lock()
uid := jsa.nuid.Next()
sb.WriteString(jsa.rplyr)
jsa.mu.Unlock()
func (jsa *mqttJSA) newRequestEx(kind, subject, cidHash string, hdr int, msg []byte, timeout time.Duration) (any, error) {
responses, err := jsa.newRequestExMulti(kind, subject, cidHash, []int{hdr}, [][]byte{msg}, timeout)
if err != nil {
return nil, err
}
if len(responses) != 1 {
return nil, fmt.Errorf("unreachable: invalid number of responses (%d)", len(responses))
}
return responses[0].value, nil
}
sb.WriteString(kind)
sb.WriteByte(btsep)
if cidHash != _EMPTY_ {
sb.WriteString(cidHash)
// newRequestExMulti sends multiple messages on the same subject and waits for
// all responses. It returns the same number of responses in the same order as
// msgs parameter. In case of a timeout it returns an error as well as all
// responses received as a sparsely populated array, matching msgs, with nils
// for the values that have not yet been received.
//
// Note that each response may represent an error and should be inspected as
// such by the caller.
func (jsa *mqttJSA) newRequestExMulti(kind, subject, cidHash string, hdrs []int, msgs [][]byte, timeout time.Duration) ([]*mqttJSAResponse, error) {
if len(hdrs) != len(msgs) {
return nil, fmt.Errorf("unreachable: invalid number of messages (%d) or header offsets (%d)", len(msgs), len(hdrs))
}
responseCh := make(chan *mqttJSAResponse, len(msgs))
// Generate and queue all outgoing requests, have all results reported to
// responseCh, and store a map of reply subjects to the original subjects'
// indices.
r2i := map[string]int{}
for i, msg := range msgs {
hdr := hdrs[i]
var sb strings.Builder
// Either we use nuid.Next() which uses a global lock, or our own nuid object, but
// then it needs to be "write" protected. This approach will reduce across account
// contention since we won't use the global nuid's lock.
jsa.mu.Lock()
uid := jsa.nuid.Next()
sb.WriteString(jsa.rplyr)
jsa.mu.Unlock()
sb.WriteString(kind)
sb.WriteByte(btsep)
if cidHash != _EMPTY_ {
sb.WriteString(cidHash)
sb.WriteByte(btsep)
}
sb.WriteString(uid)
reply := sb.String()
// Add responseCh to the reply channel map. It will be cleaned out on
// timeout (see below), or in processJSAPIReplies upon receiving the
// response.
jsa.replies.Store(reply, responseCh)
subject = jsa.prefixDomain(subject)
jsa.sendq.push(&mqttJSPubMsg{
subj: subject,
reply: reply,
hdr: hdr,
msg: msg,
})
r2i[reply] = i
}
sb.WriteString(uid)
reply := sb.String()
ch := make(chan interface{}, 1)
jsa.replies.Store(reply, ch)
subject = jsa.prefixDomain(subject)
jsa.sendq.push(&mqttJSPubMsg{
subj: subject,
reply: reply,
hdr: hdr,
msg: msg,
})
var i interface{}
// We don't want to use time.After() which causes memory growth because the timer
// can't be stopped and will need to expire to then be garbage collected.
// Wait for all responses to come back, or for the timeout to expire. We
// don't want to use time.After() which causes memory growth because the
// timer can't be stopped and will need to expire to then be garbage
// collected.
c := 0
responses := make([]*mqttJSAResponse, len(msgs))
start := time.Now()
t := time.NewTimer(timeout)
select {
case i = <-ch:
// Ensure we stop the timer so it can be quickly garbage collected.
t.Stop()
case <-jsa.quitCh:
return nil, ErrServerNotRunning
case <-t.C:
jsa.replies.Delete(reply)
return nil, fmt.Errorf("timeout for request type %q on %q (reply=%q)", kind, subject, reply)
defer t.Stop()
for {
select {
case r := <-responseCh:
i := r2i[r.reply]
responses[i] = r
c++
if c == len(msgs) {
return responses, nil
}
case <-jsa.quitCh:
return nil, ErrServerNotRunning
case <-t.C:
var reply string
now := time.Now()
for reply = range r2i { // preserve the last value for Errorf
jsa.replies.Delete(reply)
}
if len(msgs) == 1 {
return responses, fmt.Errorf("timeout after %v: request type %q on %q (reply=%q)", now.Sub(start), kind, subject, reply)
} else {
return responses, fmt.Errorf("timeout after %v: request type %q on %q: got %d out of %d", now.Sub(start), kind, subject, c, len(msgs))
}
}
}
return i, nil
}
func (jsa *mqttJSA) sendAck(ackSubject string) {
@@ -1667,6 +1728,30 @@ func (jsa *mqttJSA) loadLastMsgFor(streamName string, subject string) (*StoredMs
return lmr.Message, lmr.ToError()
}
func (jsa *mqttJSA) loadLastMsgForMulti(streamName string, subjects []string) ([]*JSApiMsgGetResponse, error) {
marshaled := make([][]byte, 0, len(subjects))
headerBytes := make([]int, 0, len(subjects))
for _, subject := range subjects {
mreq := &JSApiMsgGetRequest{LastFor: subject}
bb, err := json.Marshal(mreq)
if err != nil {
return nil, err
}
marshaled = append(marshaled, bb)
headerBytes = append(headerBytes, 0)
}
all, err := jsa.newRequestExMulti(mqttJSAMsgLoad, fmt.Sprintf(JSApiMsgGetT, streamName), _EMPTY_, headerBytes, marshaled, mqttJSAPITimeout)
// all has the same order as subjects, preserve it as we unmarshal
responses := make([]*JSApiMsgGetResponse, len(all))
for i, v := range all {
if v != nil {
responses[i] = v.value.(*JSApiMsgGetResponse)
}
}
return responses, err
}
func (jsa *mqttJSA) loadNextMsgFor(streamName string, subject string) (*StoredMsg, error) {
mreq := &JSApiMsgGetRequest{NextFor: subject}
req, err := json.Marshal(mreq)
@@ -1771,68 +1856,71 @@ func (as *mqttAccountSessionManager) processJSAPIReplies(_ *subscription, pc *cl
return
}
jsa.replies.Delete(subject)
ch := chi.(chan interface{})
ch := chi.(chan *mqttJSAResponse)
out := func(value any) {
ch <- &mqttJSAResponse{reply: subject, value: value}
}
switch token {
case mqttJSAStreamCreate:
var resp = &JSApiStreamCreateResponse{}
if err := json.Unmarshal(msg, resp); err != nil {
resp.Error = NewJSInvalidJSONError()
}
ch <- resp
out(resp)
case mqttJSAStreamUpdate:
var resp = &JSApiStreamUpdateResponse{}
if err := json.Unmarshal(msg, resp); err != nil {
resp.Error = NewJSInvalidJSONError()
}
ch <- resp
out(resp)
case mqttJSAStreamLookup:
var resp = &JSApiStreamInfoResponse{}
if err := json.Unmarshal(msg, &resp); err != nil {
resp.Error = NewJSInvalidJSONError()
}
ch <- resp
out(resp)
case mqttJSAStreamDel:
var resp = &JSApiStreamDeleteResponse{}
if err := json.Unmarshal(msg, &resp); err != nil {
resp.Error = NewJSInvalidJSONError()
}
ch <- resp
out(resp)
case mqttJSAConsumerCreate:
var resp = &JSApiConsumerCreateResponse{}
if err := json.Unmarshal(msg, resp); err != nil {
resp.Error = NewJSInvalidJSONError()
}
ch <- resp
out(resp)
case mqttJSAConsumerDel:
var resp = &JSApiConsumerDeleteResponse{}
if err := json.Unmarshal(msg, resp); err != nil {
resp.Error = NewJSInvalidJSONError()
}
ch <- resp
out(resp)
case mqttJSAMsgStore, mqttJSASessPersist:
var resp = &JSPubAckResponse{}
if err := json.Unmarshal(msg, resp); err != nil {
resp.Error = NewJSInvalidJSONError()
}
ch <- resp
out(resp)
case mqttJSAMsgLoad:
var resp = &JSApiMsgGetResponse{}
if err := json.Unmarshal(msg, resp); err != nil {
if err := json.Unmarshal(msg, &resp); err != nil {
resp.Error = NewJSInvalidJSONError()
}
ch <- resp
out(resp)
case mqttJSAStreamNames:
var resp = &JSApiStreamNamesResponse{}
if err := json.Unmarshal(msg, resp); err != nil {
resp.Error = NewJSInvalidJSONError()
}
ch <- resp
out(resp)
case mqttJSAMsgDelete:
var resp = &JSApiMsgDeleteResponse{}
if err := json.Unmarshal(msg, resp); err != nil {
resp.Error = NewJSInvalidJSONError()
}
ch <- resp
out(resp)
default:
pc.Warnf("Unknown reply code %q", token)
}
@@ -1856,8 +1944,8 @@ func (as *mqttAccountSessionManager) processRetainedMsg(_ *subscription, c *clie
// At this point we either recover from our own server, or process a remote retained message.
seq, _, _ := ackReplyInfo(reply)
// Handle this retained message
as.handleRetainedMsg(rm.Subject, &mqttRetainedMsgRef{sseq: seq}, rm)
// Handle this retained message, no need to copy the bytes.
as.handleRetainedMsg(rm.Subject, &mqttRetainedMsgRef{sseq: seq}, rm, false)
// If we were recovering (lastSeq > 0), then check if we are done.
if as.rrmLastSeq > 0 && seq >= as.rrmLastSeq {
@@ -2001,7 +2089,7 @@ func (as *mqttAccountSessionManager) createSubscription(subject string, cb msgHa
// only used when the server shutdown.
//
// No lock held on entry.
func (as *mqttAccountSessionManager) cleaupRetainedMessageCache(s *Server, closeCh chan struct{}) {
func (as *mqttAccountSessionManager) cleanupRetainedMessageCache(s *Server, closeCh chan struct{}) {
tt := time.NewTicker(mqttRetainedCacheTTL)
defer tt.Stop()
for {
@@ -2013,7 +2101,7 @@ func (as *mqttAccountSessionManager) cleaupRetainedMessageCache(s *Server, close
i, maxScan := 0, 10*1000
now := time.Now()
as.rmsCache.Range(func(key, value interface{}) bool {
rm := value.(mqttRetainedMsg)
rm := value.(*mqttRetainedMsg)
if now.After(rm.expiresFromCache) {
as.rmsCache.Delete(key)
}
@@ -2123,7 +2211,7 @@ func (as *mqttAccountSessionManager) sendJSAPIrequests(s *Server, c *client, acc
// If a message for this topic already existed, the existing record is updated
// with the provided information.
// Lock not held on entry.
func (as *mqttAccountSessionManager) handleRetainedMsg(key string, rf *mqttRetainedMsgRef, rm *mqttRetainedMsg) {
func (as *mqttAccountSessionManager) handleRetainedMsg(key string, rf *mqttRetainedMsgRef, rm *mqttRetainedMsg, copyBytesToCache bool) {
as.mu.Lock()
defer as.mu.Unlock()
if as.retmsgs == nil {
@@ -2151,11 +2239,7 @@ func (as *mqttAccountSessionManager) handleRetainedMsg(key string, rf *mqttRetai
// Update the in-memory retained message cache but only for messages
// that are already in the cache, i.e. have been (recently) used.
if rm != nil {
if _, ok := as.rmsCache.Load(key); ok {
toStore := *rm
toStore.expiresFromCache = time.Now().Add(mqttRetainedCacheTTL)
as.rmsCache.Store(key, toStore)
}
as.setCachedRetainedMsg(key, rm, true, copyBytesToCache)
}
return
}
@@ -2274,19 +2358,21 @@ func (as *mqttAccountSessionManager) removeSession(sess *mqttSession, lock bool)
// Session lock held on entry. Acquires the subs lock and holds it for
// the duration. Non-MQTT messages coming into mqttDeliverMsgCbQoS0 will be
// waiting.
func (sess *mqttSession) processQOS12Sub(
c *client, // subscribing client.
subject, sid []byte, isReserved bool, qos byte, jsDurName string, h msgHandler, // subscription parameters.
) (*subscription, error) {
return sess.processSub(c, subject, sid, isReserved, qos, jsDurName, h, false, false, nil, false, nil)
}
func (sess *mqttSession) processSub(
// subscribing client.
c *client,
// subscription parameters.
subject, sid []byte, isReserved bool, qos byte, jsDurName string, h msgHandler,
// do we need to scan for shadow subscriptions? (we don't do it for QOS1+)
initShadow bool,
// len(rms) > 0 means to deliver retained messages for the subscription.
rms map[string]*mqttRetainedMsg,
// trace serialized retained messages in the log.
trace bool,
// the retained messages are kept in the account session manager.
as *mqttAccountSessionManager,
c *client, // subscribing client.
subject, sid []byte, isReserved bool, qos byte, jsDurName string, h msgHandler, // subscription parameters.
initShadow bool, // do we need to scan for shadow subscriptions? (not for QOS1+)
serializeRMS bool, // do we need to serialize RMS?
rms map[string]*mqttRetainedMsg, // preloaded rms (can be empty, or missing items if errors)
trace bool, // trace serialized retained messages in the log?
as *mqttAccountSessionManager, // needed only for rms serialization.
) (*subscription, error) {
start := time.Now()
defer func() {
@@ -2350,6 +2436,10 @@ func (sess *mqttSession) processSub(
func (as *mqttAccountSessionManager) processSubs(sess *mqttSession, c *client,
filters []*mqttFilter, fromSubProto, trace bool) ([]*subscription, error) {
c.mu.Lock()
acc := c.acc
c.mu.Unlock()
// Helper to determine if we need to create a separate top-level
// subscription for a wildcard.
fwc := func(subject string) (bool, string, string) {
@@ -2364,29 +2454,7 @@ func (as *mqttAccountSessionManager) processSubs(sess *mqttSession, c *client,
return true, fwcsubject, fwcsid
}
// Cache and a helper to load retained messages for a given subject.
rms := make(map[string]*mqttRetainedMsg)
loadRMS := func(subject []byte) error {
sub := &subscription{
client: c,
subject: subject,
sid: subject,
}
c.mu.Lock()
acc := c.acc
c.mu.Unlock()
if err := c.addShadowSubscriptions(acc, sub, false); err != nil {
return err
}
// Best-effort loading the messages, logs on errors (to c.srv), loads
// once for subject.
as.loadRetainedMessagesForSubject(rms, subject, c.srv)
for _, ss := range sub.shadow {
as.loadRetainedMessagesForSubject(rms, ss.subject, c.srv)
}
return nil
}
rmSubjects := map[string]struct{}{}
// Preload retained messages for all requested subscriptions. Also, since
// it's the first iteration over the filter list, do some cleanup.
for _, f := range filters {
@@ -2416,14 +2484,33 @@ func (as *mqttAccountSessionManager) processSubs(sess *mqttSession, c *client,
}
}
// Load retained messages.
// Find retained messages.
if fromSubProto {
if err := loadRMS([]byte(f.filter)); err != nil {
addRMSubjects := func(subject string) error {
sub := &subscription{
client: c,
subject: []byte(subject),
sid: []byte(subject),
}
if err := c.addShadowSubscriptions(acc, sub, false); err != nil {
return err
}
for _, sub := range append([]*subscription{sub}, sub.shadow...) {
as.addRetainedSubjectsForSubject(rmSubjects, bytesToString(sub.subject))
for _, ss := range sub.shadow {
as.addRetainedSubjectsForSubject(rmSubjects, bytesToString(ss.subject))
}
}
return nil
}
if err := addRMSubjects(f.filter); err != nil {
f.qos = mqttSubAckFailure
continue
}
if need, subject, _ := fwc(f.filter); need {
if err := loadRMS([]byte(subject)); err != nil {
if err := addRMSubjects(subject); err != nil {
f.qos = mqttSubAckFailure
continue
}
@@ -2431,6 +2518,14 @@ func (as *mqttAccountSessionManager) processSubs(sess *mqttSession, c *client,
}
}
serializeRMS := len(rmSubjects) > 0
var rms map[string]*mqttRetainedMsg
if serializeRMS {
// Make the best effort to load retained messages. We will identify
// errors in the next pass.
rms = as.loadRetainedMessages(rmSubjects, c)
}
// Small helper to add the consumer config to the session.
addJSConsToSess := func(sid string, cc *ConsumerConfig) {
if cc == nil {
@@ -2445,7 +2540,6 @@ func (as *mqttAccountSessionManager) processSubs(sess *mqttSession, c *client,
var err error
subs := make([]*subscription, 0, len(filters))
for _, f := range filters {
// Skip what's already been identified as a failure.
if f.qos == mqttSubAckFailure {
continue
@@ -2472,7 +2566,7 @@ func (as *mqttAccountSessionManager) processSubs(sess *mqttSession, c *client,
bsubject, bsid, isReserved, f.qos, // main subject
_EMPTY_, mqttDeliverMsgCbQoS0, // no jsDur for QOS0
processShadowSubs,
rms, trace, as) // rms is empty if not fromSubProto
serializeRMS, rms, trace, as)
sess.mu.Unlock()
as.mu.Unlock()
@@ -2506,7 +2600,7 @@ func (as *mqttAccountSessionManager) processSubs(sess *mqttSession, c *client,
[]byte(fwcsubject), []byte(fwcsid), isReserved, f.qos, // FWC (top-level wildcard) subject
_EMPTY_, mqttDeliverMsgCbQoS0, // no jsDur for QOS0
processShadowSubs,
rms, trace, as) // rms is empty if not fromSubProto
serializeRMS, rms, trace, as)
sess.mu.Unlock()
as.mu.Unlock()
if err != nil {
@@ -2532,6 +2626,7 @@ func (as *mqttAccountSessionManager) processSubs(sess *mqttSession, c *client,
subs = append(subs, sub)
addJSConsToSess(sid, jscons)
}
if fromSubProto {
err = sess.update(filters, true)
}
@@ -2548,20 +2643,21 @@ func (as *mqttAccountSessionManager) processSubs(sess *mqttSession, c *client,
// Runs from the client's readLoop.
// Account session manager lock held on entry.
// Session lock held on entry.
func (as *mqttAccountSessionManager) serializeRetainedMsgsForSub(rms map[string]*mqttRetainedMsg, sess *mqttSession, c *client, sub *subscription, trace bool) {
func (as *mqttAccountSessionManager) serializeRetainedMsgsForSub(rms map[string]*mqttRetainedMsg, sess *mqttSession, c *client, sub *subscription, trace bool) error {
if len(as.retmsgs) == 0 || len(rms) == 0 {
return
return nil
}
result := as.sl.ReverseMatch(string(sub.subject))
if len(result.psubs) == 0 {
return
return nil
}
toTrace := []mqttPublish{}
for _, psub := range result.psubs {
rm := rms[string(psub.subject)]
if rm == nil {
// This should not happen since we pre-load messages into the cache
// before calling serialize.
// This should not happen since we pre-load messages into rms before
// calling serialize.
continue
}
var pi uint16
@@ -2591,65 +2687,90 @@ func (as *mqttAccountSessionManager) serializeRetainedMsgsForSub(rms map[string]
sub.mqtt.prm = append(sub.mqtt.prm, headerBytes, rm.Msg)
c.mu.Unlock()
if trace {
pp := mqttPublish{
toTrace = append(toTrace, mqttPublish{
topic: []byte(rm.Topic),
flags: flags,
pi: pi,
sz: len(rm.Msg),
}
c.traceOutOp("PUBLISH", []byte(mqttPubTrace(&pp)))
})
}
}
for _, pp := range toTrace {
c.traceOutOp("PUBLISH", []byte(mqttPubTrace(&pp)))
}
return nil
}
// Returns in the provided slice all publish retained message records that
// Appends the stored message subjects for all retained message records that
// match the given subscription's `subject` (which could have wildcards).
//
// Account session manager NOT lock held on entry.
func (as *mqttAccountSessionManager) loadRetainedMessagesForSubject(rms map[string]*mqttRetainedMsg, topSubject []byte, log Logger) {
func (as *mqttAccountSessionManager) addRetainedSubjectsForSubject(list map[string]struct{}, topSubject string) bool {
as.mu.RLock()
if len(as.retmsgs) == 0 {
as.mu.RUnlock()
return
return false
}
result := as.sl.ReverseMatch(string(topSubject))
result := as.sl.ReverseMatch(topSubject)
as.mu.RUnlock()
if len(result.psubs) == 0 {
return
}
added := false
for _, sub := range result.psubs {
subject := string(sub.subject)
if rms[subject] != nil {
continue // already loaded
}
// See if we have the retained message in the cache.
if rmv, _ := as.rmsCache.Load(subject); rmv != nil {
rm := rmv.(mqttRetainedMsg)
rms[subject] = &rm
if _, ok := list[subject]; ok {
continue
}
list[subject] = struct{}{}
added = true
}
// Load the retained message from the stream, and cache it for reuse in
// the near future.
loadSubject := mqttRetainedMsgsStreamSubject + subject
jsm, err := as.jsa.loadLastMsgFor(mqttRetainedMsgsStreamName, loadSubject)
if err != nil || jsm == nil {
log.Warnf("failed to load retained message for subject %q: %v", loadSubject, err)
return added
}
type warner interface {
Warnf(format string, v ...any)
}
// Loads a list of retained messages given a list of stored message subjects.
func (as *mqttAccountSessionManager) loadRetainedMessages(subjects map[string]struct{}, w warner) map[string]*mqttRetainedMsg {
rms := make(map[string]*mqttRetainedMsg, len(subjects))
ss := []string{}
for s := range subjects {
if rm := as.getCachedRetainedMsg(s); rm != nil {
rms[s] = rm
} else {
ss = append(ss, mqttRetainedMsgsStreamSubject+s)
}
}
if len(ss) == 0 {
return rms
}
results, err := as.jsa.loadLastMsgForMulti(mqttRetainedMsgsStreamName, ss)
// If an error occurred, warn, but then proceed with what we got.
if err != nil {
w.Warnf("error loading retained messages: %v", err)
}
for i, result := range results {
if result == nil {
continue // skip requests that timed out
}
if result.ToError() != nil {
w.Warnf("failed to load retained message for subject %q: %v", ss[i], err)
continue
}
var rm mqttRetainedMsg
if err := json.Unmarshal(jsm.Data, &rm); err != nil {
log.Warnf("failed to decode retained message for subject %q: %v", loadSubject, err)
if err := json.Unmarshal(result.Message.Data, &rm); err != nil {
w.Warnf("failed to decode retained message for subject %q: %v", ss[i], err)
continue
}
// Add the loaded retained message to the cache.
rm.expiresFromCache = time.Now().Add(mqttRetainedCacheTTL)
as.rmsCache.Store(subject, rm)
rms[subject] = &rm
// Add the loaded retained message to the cache, and to the results map.
key := ss[i][len(mqttRetainedMsgsStreamSubject):]
as.setCachedRetainedMsg(key, &rm, false, false)
rms[key] = &rm
}
return rms
}
// Creates the session stream (limit msgs of 1) for this client ID if it does
@@ -2833,6 +2954,32 @@ func (as *mqttAccountSessionManager) transferRetainedToPerKeySubjectStream(log *
return nil
}
func (as *mqttAccountSessionManager) getCachedRetainedMsg(subject string) *mqttRetainedMsg {
v, ok := as.rmsCache.Load(subject)
if !ok {
return nil
}
rm := v.(*mqttRetainedMsg)
if rm.expiresFromCache.Before(time.Now()) {
as.rmsCache.Delete(subject)
return nil
}
return rm
}
func (as *mqttAccountSessionManager) setCachedRetainedMsg(subject string, rm *mqttRetainedMsg, onlyReplace bool, copyBytesToCache bool) {
rm.expiresFromCache = time.Now().Add(mqttRetainedCacheTTL)
if onlyReplace {
if _, ok := as.rmsCache.Load(subject); !ok {
return
}
}
if copyBytesToCache {
rm.Msg = copyBytes(rm.Msg)
}
as.rmsCache.Store(subject, rm)
}
//////////////////////////////////////////////////////////////////////////////
//
// MQTT session related functions
@@ -4026,7 +4173,7 @@ func (c *client) mqttHandlePubRetain() {
sseq: smr.Sequence,
}
// Add/update the map
asm.handleRetainedMsg(key, rf, rm)
asm.handleRetainedMsg(key, rf, rm, true) // will copy the payload bytes if needs to update rmsCache
} else {
c.mu.Lock()
acc := c.acc
@@ -4893,9 +5040,8 @@ func (sess *mqttSession) processJSConsumer(c *client, subject, sid string,
// for the JS durable's deliver subject.
sess.mu.Lock()
sess.tmaxack = tmaxack
sub, err := sess.processSub(c, []byte(inbox), []byte(inbox),
isMQTTReservedSubscription(subject), qos, cc.Durable, mqttDeliverMsgCbQoS12,
false, nil, false, nil) // no shadow subs, no retained message delivery
sub, err := sess.processQOS12Sub(c, []byte(inbox), []byte(inbox),
isMQTTReservedSubscription(subject), qos, cc.Durable, mqttDeliverMsgCbQoS12)
sess.mu.Unlock()
if err != nil {

View File

@@ -147,6 +147,7 @@ type LeafNodeOpts struct {
Port int `json:"port,omitempty"`
Username string `json:"-"`
Password string `json:"-"`
Nkey string `json:"-"`
Account string `json:"-"`
Users []*User `json:"-"`
AuthTimeout float64 `json:"auth_timeout,omitempty"`
@@ -192,6 +193,7 @@ type RemoteLeafOpts struct {
NoRandomize bool `json:"-"`
URLs []*url.URL `json:"urls,omitempty"`
Credentials string `json:"-"`
Nkey string `json:"-"`
SignatureCB SignatureHandler `json:"-"`
TLS bool `json:"-"`
TLSConfig *tls.Config `json:"-"`
@@ -638,6 +640,7 @@ type authorization struct {
user string
pass string
token string
nkey string
acc string
// Multiple Nkeys/Users
nkeys []*NkeyUser
@@ -669,6 +672,13 @@ type TLSConfigOpts struct {
CertMatchBy certstore.MatchByType
CertMatch string
OCSPPeerConfig *certidp.OCSPPeerConfig
Certificates []*TLSCertPairOpt
}
// TLSCertPairOpt are the paths to a certificate and private key.
type TLSCertPairOpt struct {
CertFile string
KeyFile string
}
// OCSPConfig represents the options of OCSP stapling options.
@@ -2259,6 +2269,7 @@ func parseLeafNodes(v interface{}, opts *Options, errors *[]error, warnings *[]e
opts.LeafNode.AuthTimeout = auth.timeout
opts.LeafNode.Account = auth.acc
opts.LeafNode.Users = auth.users
opts.LeafNode.Nkey = auth.nkey
// Validate user info config for leafnode authorization
if err := validateLeafNodeAuthOptions(opts); err != nil {
*errors = append(*errors, &configErr{tk, err.Error()})
@@ -2344,6 +2355,12 @@ func parseLeafAuthorization(v interface{}, errors *[]error, warnings *[]error) (
auth.user = mv.(string)
case "pass", "password":
auth.pass = mv.(string)
case "nkey":
nk := mv.(string)
if !nkeys.IsValidPublicUserKey(nk) {
*errors = append(*errors, &configErr{tk, "Not a valid public nkey for leafnode authorization"})
}
auth.nkey = nk
case "timeout":
at := float64(1)
switch mv := mv.(type) {
@@ -2489,7 +2506,24 @@ func parseRemoteLeafNodes(v interface{}, errors *[]error, warnings *[]error) ([]
*errors = append(*errors, &configErr{tk, err.Error()})
continue
}
// Can't have both creds and nkey
if remote.Nkey != _EMPTY_ {
*errors = append(*errors, &configErr{tk, "Remote leafnode can not have both creds and nkey defined"})
continue
}
remote.Credentials = p
case "nkey", "seed":
nk := v.(string)
if pb, _, err := nkeys.DecodeSeed([]byte(nk)); err != nil || pb != nkeys.PrefixByteUser {
err := &configErr{tk, fmt.Sprintf("Remote leafnode nkey is not a valid seed: %q", v)}
*errors = append(*errors, err)
continue
}
if remote.Credentials != _EMPTY_ {
*errors = append(*errors, &configErr{tk, "Remote leafnode can not have both creds and nkey defined"})
continue
}
remote.Nkey = nk
case "tls":
tc, err := parseTLS(tk, true)
if err != nil {
@@ -4180,7 +4214,7 @@ func parseTLS(v interface{}, isClientCtx bool) (t *TLSConfigOpts, retErr error)
)
defer convertPanicToError(&lt, &retErr)
_, v = unwrapValue(v, &lt)
tk, v := unwrapValue(v, &lt)
tlsm = v.(map[string]interface{})
for mk, mv := range tlsm {
tk, mv := unwrapValue(mv, &lt)
@@ -4381,10 +4415,46 @@ func parseTLS(v interface{}, isClientCtx bool) (t *TLSConfigOpts, retErr error)
default:
return nil, &configErr{tk, fmt.Sprintf("error parsing ocsp peer config: unsupported type %T", v)}
}
case "certs", "certificates":
certs, ok := mv.([]interface{})
if !ok {
return nil, &configErr{tk, fmt.Sprintf("error parsing certificates config: unsupported type %T", v)}
}
tc.Certificates = make([]*TLSCertPairOpt, len(certs))
for i, v := range certs {
tk, vv := unwrapValue(v, &lt)
pair, ok := vv.(map[string]interface{})
if !ok {
return nil, &configErr{tk, fmt.Sprintf("error parsing certificates config: unsupported type %T", vv)}
}
certPair := &TLSCertPairOpt{}
for k, v := range pair {
tk, vv = unwrapValue(v, &lt)
file, ok := vv.(string)
if !ok {
return nil, &configErr{tk, fmt.Sprintf("error parsing certificates config: unsupported type %T", vv)}
}
switch k {
case "cert_file":
certPair.CertFile = file
case "key_file":
certPair.KeyFile = file
default:
return nil, &configErr{tk, fmt.Sprintf("error parsing tls certs config, unknown field %q", k)}
}
}
if certPair.CertFile == _EMPTY_ || certPair.KeyFile == _EMPTY_ {
return nil, &configErr{tk, "error parsing certificates config: both 'cert_file' and 'cert_key' options are required"}
}
tc.Certificates[i] = certPair
}
default:
return nil, &configErr{tk, fmt.Sprintf("error parsing tls config, unknown field [%q]", mk)}
return nil, &configErr{tk, fmt.Sprintf("error parsing tls config, unknown field %q", mk)}
}
}
if len(tc.Certificates) > 0 && tc.CertFile != _EMPTY_ {
return nil, &configErr{tk, "error parsing tls config, cannot combine 'cert_file' option with 'certs' option"}
}
// If cipher suites were not specified then use the defaults
if tc.Ciphers == nil {
@@ -4696,6 +4766,20 @@ func GenTLSConfig(tc *TLSConfigOpts) (*tls.Config, error) {
if err != nil {
return nil, err
}
case tc.Certificates != nil:
// Multiple certificate support.
config.Certificates = make([]tls.Certificate, len(tc.Certificates))
for i, certPair := range tc.Certificates {
cert, err := tls.LoadX509KeyPair(certPair.CertFile, certPair.KeyFile)
if err != nil {
return nil, fmt.Errorf("error parsing X509 certificate/key pair %d/%d: %v", i+1, len(tc.Certificates), err)
}
cert.Leaf, err = x509.ParseCertificate(cert.Certificate[0])
if err != nil {
return nil, fmt.Errorf("error parsing certificate %d/%d: %v", i+1, len(tc.Certificates), err)
}
config.Certificates[i] = cert
}
}
// Require client certificates as needed

View File

@@ -25,11 +25,14 @@ import (
"net"
"os"
"path/filepath"
"runtime"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/nats-io/nats-server/v2/internal/fastrand"
"github.com/minio/highwayhash"
)
@@ -201,8 +204,6 @@ type raft struct {
stepdown *ipQueue[string] // Stepdown requests
leadc chan bool // Leader changes
quit chan struct{} // Raft group shutdown
prand *rand.Rand // Random generator, used to generate inboxes for instance
}
// cacthupState structure that holds our subscription, and catchup term and index
@@ -348,9 +349,9 @@ func (s *Server) startRaftNode(accName string, cfg *RaftConfig, labels pprofLabe
sq := s.sys.sq
sacc := s.sys.account
hash := s.sys.shash
pub := s.info.ID
s.mu.RUnlock()
// Do this here to process error quicker.
ps, err := readPeerState(cfg.Store)
if err != nil {
return nil, err
@@ -360,12 +361,6 @@ func (s *Server) startRaftNode(accName string, cfg *RaftConfig, labels pprofLabe
}
qpfx := fmt.Sprintf("[ACC:%s] RAFT '%s' ", accName, cfg.Name)
rsrc := time.Now().UnixNano()
if len(pub) >= 32 {
if h, _ := highwayhash.New64([]byte(pub[:32])); h != nil {
rsrc += int64(h.Sum64())
}
}
n := &raft{
created: time.Now(),
id: hash[:idLen],
@@ -397,7 +392,6 @@ func (s *Server) startRaftNode(accName string, cfg *RaftConfig, labels pprofLabe
leadc: make(chan bool, 1),
observer: cfg.Observer,
extSt: ps.domainExt,
prand: rand.New(rand.NewSource(rsrc)),
}
n.c.registerWithAccount(sacc)
@@ -430,13 +424,19 @@ func (s *Server) startRaftNode(accName string, cfg *RaftConfig, labels pprofLabe
n.setupLastSnapshot()
}
truncateAndErr := func(index uint64) {
if err := n.wal.Truncate(index); err != nil {
n.setWriteErr(err)
}
}
// Retrieve the stream state from the WAL. If there are pending append
// entries that were committed but not applied before we last shut down,
// we will try to replay them and process them here.
var state StreamState
n.wal.FastState(&state)
if state.Msgs > 0 {
// TODO(dlc) - Recover our state here.
n.debug("Replaying state of %d entries", state.Msgs)
if first, err := n.loadFirstEntry(); err == nil {
n.pterm, n.pindex = first.pterm, first.pindex
if first.commit > 0 && first.commit > n.commit {
@@ -444,31 +444,36 @@ func (s *Server) startRaftNode(accName string, cfg *RaftConfig, labels pprofLabe
}
}
// This process will queue up entries on our applied queue but prior to the upper
// state machine running. So we will monitor how much we have queued and if we
// reach a limit will pause the apply queue and resume inside of run() go routine.
const maxQsz = 32 * 1024 * 1024 // 32MB max
// It looks like there are entries we have committed but not applied
// yet. Replay them.
for index := state.FirstSeq; index <= state.LastSeq; index++ {
for index, qsz := state.FirstSeq, 0; index <= state.LastSeq; index++ {
ae, err := n.loadEntry(index)
if err != nil {
n.warn("Could not load %d from WAL [%+v]: %v", index, state, err)
if err := n.wal.Truncate(index); err != nil {
n.setWriteErrLocked(err)
}
truncateAndErr(index)
break
}
if ae.pindex != index-1 {
n.warn("Corrupt WAL, will truncate")
if err := n.wal.Truncate(index); err != nil {
n.setWriteErrLocked(err)
}
truncateAndErr(index)
break
}
n.processAppendEntry(ae, nil)
// Check how much we have queued up so far to determine if we should pause.
for _, e := range ae.entries {
qsz += len(e.Data)
if qsz > maxQsz && !n.paused {
n.PauseApply()
}
}
}
}
// Send nil entry to signal the upper layers we are done doing replay/restore.
n.apply.push(nil)
// Make sure to track ourselves.
n.peers[n.id] = &lps{time.Now().UnixNano(), 0, true}
@@ -510,8 +515,9 @@ func (s *Server) startRaftNode(accName string, cfg *RaftConfig, labels pprofLabe
labels["group"] = n.group
s.registerRaftNode(n.group, n)
// Start the goroutines for the Raft state machine and the file writer.
// Start the run goroutine for the Raft state machine.
s.startGoRoutine(n.run, labels)
// Start the filewriter.
s.startGoRoutine(n.fileWriter)
return n, nil
@@ -887,8 +893,20 @@ func (n *raft) ResumeApply() {
n.debug("Resuming %d replays", n.hcommit+1-n.commit)
for index := n.commit + 1; index <= n.hcommit; index++ {
if err := n.applyCommit(index); err != nil {
n.warn("Got error on apply commit during replay: %v", err)
break
}
// We want to unlock here to allow the upper layers to call Applied() without blocking.
n.Unlock()
// Give hint to let other Go routines run.
// Might not be necessary but seems to make it more fine grained interleaving.
runtime.Gosched()
// Simply re-acquire
n.Lock()
// Need to check if we got closed or if we were paused again.
if n.State() == Closed || n.paused {
return
}
}
}
n.hcommit = 0
@@ -1034,7 +1052,7 @@ func (n *raft) InstallSnapshot(data []byte) error {
sn := fmt.Sprintf(snapFileT, snap.lastTerm, snap.lastIndex)
sfile := filepath.Join(snapDir, sn)
if err := os.WriteFile(sfile, n.encodeSnapshot(snap), 0640); err != nil {
if err := os.WriteFile(sfile, n.encodeSnapshot(snap), defaultFilePerms); err != nil {
n.Unlock()
// We could set write err here, but if this is a temporary situation, too many open files etc.
// we want to retry and snapshots are not fatal.
@@ -1659,7 +1677,7 @@ const (
// Lock should be held (due to use of random generator)
func (n *raft) newCatchupInbox() string {
var b [replySuffixLen]byte
rn := n.prand.Int63()
rn := fastrand.Uint64()
for i, l := 0, rn; i < len(b); i++ {
b[i] = digits[l%base]
l /= base
@@ -1669,7 +1687,7 @@ func (n *raft) newCatchupInbox() string {
func (n *raft) newInbox() string {
var b [replySuffixLen]byte
rn := n.prand.Int63()
rn := fastrand.Uint64()
for i, l := 0, rn; i < len(b); i++ {
b[i] = digits[l%base]
l /= base
@@ -1765,14 +1783,14 @@ func (n *raft) run() {
// at least a route, leaf or gateway connection to be established before
// starting the run loop.
for gw := s.gateway; ; {
s.mu.Lock()
ready := s.numRemotes()+len(s.leafs) > 0
if !ready && gw.enabled {
s.mu.RLock()
ready, gwEnabled := s.numRemotes()+len(s.leafs) > 0, gw.enabled
s.mu.RUnlock()
if !ready && gwEnabled {
gw.RLock()
ready = len(gw.out)+len(gw.in) > 0
gw.RUnlock()
}
s.mu.Unlock()
if !ready {
select {
case <-s.quitCh:
@@ -1785,6 +1803,13 @@ func (n *raft) run() {
}
}
// We may have paused adding entries to apply queue, resume here.
// No-op if not paused.
n.ResumeApply()
// Send nil entry to signal the upper layers we are done doing replay/restore.
n.apply.push(nil)
for s.isRunning() {
switch n.State() {
case Follower:
@@ -3323,7 +3348,7 @@ func (n *raft) processAppendEntry(ae *appendEntry, sub *subscription) {
// Here we can become a leader but need to wait for resume of the apply queue.
n.lxfer = true
}
} else {
} else if n.vote != noVote {
// Since we are here we are not the chosen one but we should clear any vote preference.
n.vote = noVote
n.writeTermVote()
@@ -3616,7 +3641,7 @@ func (vr *voteRequest) encode() []byte {
return buf[:voteRequestLen]
}
func (n *raft) decodeVoteRequest(msg []byte, reply string) *voteRequest {
func decodeVoteRequest(msg []byte, reply string) *voteRequest {
if len(msg) != voteRequestLen {
return nil
}
@@ -3653,7 +3678,7 @@ func writePeerState(sd string, ps *peerState) error {
if _, err := os.Stat(psf); err != nil && !os.IsNotExist(err) {
return err
}
if err := os.WriteFile(psf, encodePeerState(ps), 0640); err != nil {
if err := os.WriteFile(psf, encodePeerState(ps), defaultFilePerms); err != nil {
return err
}
return nil
@@ -3753,7 +3778,7 @@ func (n *raft) fileWriter() {
copy(buf[0:], n.wtv)
n.RUnlock()
<-dios
err := os.WriteFile(tvf, buf[:], 0640)
err := os.WriteFile(tvf, buf[:], defaultFilePerms)
dios <- struct{}{}
if err != nil && !n.isClosed() {
n.setWriteErr(err)
@@ -3765,7 +3790,7 @@ func (n *raft) fileWriter() {
buf := copyBytes(n.wps)
n.RUnlock()
<-dios
err := os.WriteFile(psf, buf, 0640)
err := os.WriteFile(psf, buf, defaultFilePerms)
dios <- struct{}{}
if err != nil && !n.isClosed() {
n.setWriteErr(err)
@@ -3818,7 +3843,7 @@ func (vr *voteResponse) encode() []byte {
return buf[:voteResponseLen]
}
func (n *raft) decodeVoteResponse(msg []byte) *voteResponse {
func decodeVoteResponse(msg []byte) *voteResponse {
if len(msg) != voteResponseLen {
return nil
}
@@ -3829,7 +3854,7 @@ func (n *raft) decodeVoteResponse(msg []byte) *voteResponse {
}
func (n *raft) handleVoteResponse(sub *subscription, c *client, _ *Account, _, reply string, msg []byte) {
vr := n.decodeVoteResponse(msg)
vr := decodeVoteResponse(msg)
n.debug("Received a voteResponse %+v", vr)
if vr == nil {
n.error("Received malformed vote response for %q", n.group)
@@ -3903,7 +3928,7 @@ func (n *raft) processVoteRequest(vr *voteRequest) error {
}
func (n *raft) handleVoteRequest(sub *subscription, c *client, _ *Account, subject, reply string, msg []byte) {
vr := n.decodeVoteRequest(msg, reply)
vr := decodeVoteRequest(msg, reply)
if vr == nil {
n.error("Received malformed vote request for %q", n.group)
return

View File

@@ -834,7 +834,7 @@ type profBlockRateReload struct {
func (o *profBlockRateReload) Apply(s *Server) {
s.setBlockProfileRate(o.newValue)
s.Noticef("Reloaded: block_prof_rate = %v", o.newValue)
s.Noticef("Reloaded: prof_block_rate = %v", o.newValue)
}
type leafNodeOption struct {
@@ -1703,7 +1703,6 @@ func (s *Server) applyOptions(ctx *reloadContext, opts []option) {
reloadClientTrcLvl = false
reloadJetstream = false
jsEnabled = false
reloadTLS = false
isStatszChange = false
co *clusterOption
)
@@ -1718,9 +1717,6 @@ func (s *Server) applyOptions(ctx *reloadContext, opts []option) {
if opt.IsAuthChange() {
reloadAuth = true
}
if opt.IsTLSChange() {
reloadTLS = true
}
if opt.IsClusterPoolSizeOrAccountsChange() {
co = opt.(*clusterOption)
}
@@ -1778,13 +1774,9 @@ func (s *Server) applyOptions(ctx *reloadContext, opts []option) {
s.updateRemoteLeafNodesTLSConfig(newOpts)
}
// This will fire if TLS enabled at root (NATS listener) -or- if ocsp or ocsp_cache
// appear in the config.
if reloadTLS {
// Restart OCSP monitoring.
if err := s.reloadOCSP(); err != nil {
s.Warnf("Can't restart OCSP features: %v", err)
}
// Always restart OCSP monitoring on reload.
if err := s.reloadOCSP(); err != nil {
s.Warnf("Can't restart OCSP features: %v", err)
}
s.Noticef("Reloaded server configuration")

View File

@@ -1,4 +1,4 @@
// Copyright 2012-2022 The NATS Authors
// Copyright 2012-2024 The NATS Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -1117,9 +1117,14 @@ func (s *Server) configureAccounts(reloading bool) (map[string]struct{}, error)
create = false
}
}
// Track old mappings if global account.
var oldGMappings []*mapping
if create {
if acc.Name == globalAccountName {
a = s.gacc
a.mu.Lock()
oldGMappings = append(oldGMappings, a.mappings...)
a.mu.Unlock()
} else {
a = NewAccount(acc.Name)
}
@@ -1130,9 +1135,34 @@ func (s *Server) configureAccounts(reloading bool) (map[string]struct{}, error)
// Will be a no-op in case of the global account since it is already registered.
s.registerAccountNoLock(a)
}
// The `acc` account is stored in options, not in the server, and these can be cleared.
acc.sl, acc.clients, acc.mappings = nil, nil, nil
// Check here if we have been reloaded and we have a global account with mappings that may have changed.
// If we have leafnodes they need to be updated.
if reloading && a == s.gacc {
a.mu.Lock()
var mappings []*mapping
if len(a.mappings) > 0 && a.nleafs > 0 {
mappings = append(mappings, a.mappings...)
}
a.mu.Unlock()
if len(mappings) > 0 || len(oldGMappings) > 0 {
a.lmu.RLock()
for _, lc := range a.lleafs {
for _, em := range mappings {
lc.forceAddToSmap(em.src)
}
// Remove any old ones if needed.
for _, em := range oldGMappings {
lc.forceRemoveFromSmap(em.src)
}
}
a.lmu.RUnlock()
}
}
// If we see an account defined using $SYS we will make sure that is set as system account.
if acc.Name == DEFAULT_SYSTEM_ACCOUNT && opts.SystemAccount == _EMPTY_ {
opts.SystemAccount = DEFAULT_SYSTEM_ACCOUNT

View File

@@ -1,4 +1,4 @@
// Copyright 2019-2023 The NATS Authors
// Copyright 2019-2024 The NATS Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -15,7 +15,6 @@ package server
import (
"encoding/binary"
"encoding/json"
"errors"
"fmt"
"io"
@@ -86,6 +85,7 @@ type StreamStore interface {
StoreMsg(subject string, hdr, msg []byte) (uint64, int64, error)
StoreRawMsg(subject string, hdr, msg []byte, seq uint64, ts int64) error
SkipMsg() uint64
SkipMsgs(seq uint64, num uint64) error
LoadMsg(seq uint64, sm *StoreMsg) (*StoreMsg, error)
LoadNextMsg(filter string, wc bool, start uint64, smp *StoreMsg) (sm *StoreMsg, skip uint64, err error)
LoadLastMsg(subject string, sm *StoreMsg) (*StoreMsg, error)
@@ -442,14 +442,16 @@ type TemplateStore interface {
Delete(*streamTemplate) error
}
func jsonString(s string) string {
return "\"" + s + "\""
}
const (
limitsPolicyString = "limits"
interestPolicyString = "interest"
workQueuePolicyString = "workqueue"
limitsPolicyJSONString = `"limits"`
interestPolicyJSONString = `"interest"`
workQueuePolicyJSONString = `"workqueue"`
)
var (
limitsPolicyJSONBytes = []byte(limitsPolicyJSONString)
interestPolicyJSONBytes = []byte(interestPolicyJSONString)
workQueuePolicyJSONBytes = []byte(workQueuePolicyJSONString)
)
func (rp RetentionPolicy) String() string {
@@ -468,11 +470,11 @@ func (rp RetentionPolicy) String() string {
func (rp RetentionPolicy) MarshalJSON() ([]byte, error) {
switch rp {
case LimitsPolicy:
return json.Marshal(limitsPolicyString)
return limitsPolicyJSONBytes, nil
case InterestPolicy:
return json.Marshal(interestPolicyString)
return interestPolicyJSONBytes, nil
case WorkQueuePolicy:
return json.Marshal(workQueuePolicyString)
return workQueuePolicyJSONBytes, nil
default:
return nil, fmt.Errorf("can not marshal %v", rp)
}
@@ -480,11 +482,11 @@ func (rp RetentionPolicy) MarshalJSON() ([]byte, error) {
func (rp *RetentionPolicy) UnmarshalJSON(data []byte) error {
switch string(data) {
case jsonString(limitsPolicyString):
case limitsPolicyJSONString:
*rp = LimitsPolicy
case jsonString(interestPolicyString):
case interestPolicyJSONString:
*rp = InterestPolicy
case jsonString(workQueuePolicyString):
case workQueuePolicyJSONString:
*rp = WorkQueuePolicy
default:
return fmt.Errorf("can not unmarshal %q", data)
@@ -506,9 +508,9 @@ func (dp DiscardPolicy) String() string {
func (dp DiscardPolicy) MarshalJSON() ([]byte, error) {
switch dp {
case DiscardOld:
return json.Marshal("old")
return []byte(`"old"`), nil
case DiscardNew:
return json.Marshal("new")
return []byte(`"new"`), nil
default:
return nil, fmt.Errorf("can not marshal %v", dp)
}
@@ -516,9 +518,9 @@ func (dp DiscardPolicy) MarshalJSON() ([]byte, error) {
func (dp *DiscardPolicy) UnmarshalJSON(data []byte) error {
switch strings.ToLower(string(data)) {
case jsonString("old"):
case `"old"`:
*dp = DiscardOld
case jsonString("new"):
case `"new"`:
*dp = DiscardNew
default:
return fmt.Errorf("can not unmarshal %q", data)
@@ -527,9 +529,15 @@ func (dp *DiscardPolicy) UnmarshalJSON(data []byte) error {
}
const (
memoryStorageString = "memory"
fileStorageString = "file"
anyStorageString = "any"
memoryStorageJSONString = `"memory"`
fileStorageJSONString = `"file"`
anyStorageJSONString = `"any"`
)
var (
memoryStorageJSONBytes = []byte(memoryStorageJSONString)
fileStorageJSONBytes = []byte(fileStorageJSONString)
anyStorageJSONBytes = []byte(anyStorageJSONString)
)
func (st StorageType) String() string {
@@ -548,11 +556,11 @@ func (st StorageType) String() string {
func (st StorageType) MarshalJSON() ([]byte, error) {
switch st {
case MemoryStorage:
return json.Marshal(memoryStorageString)
return memoryStorageJSONBytes, nil
case FileStorage:
return json.Marshal(fileStorageString)
return fileStorageJSONBytes, nil
case AnyStorage:
return json.Marshal(anyStorageString)
return anyStorageJSONBytes, nil
default:
return nil, fmt.Errorf("can not marshal %v", st)
}
@@ -560,11 +568,11 @@ func (st StorageType) MarshalJSON() ([]byte, error) {
func (st *StorageType) UnmarshalJSON(data []byte) error {
switch string(data) {
case jsonString(memoryStorageString):
case memoryStorageJSONString:
*st = MemoryStorage
case jsonString(fileStorageString):
case fileStorageJSONString:
*st = FileStorage
case jsonString(anyStorageString):
case anyStorageJSONString:
*st = AnyStorage
default:
return fmt.Errorf("can not unmarshal %q", data)
@@ -573,19 +581,25 @@ func (st *StorageType) UnmarshalJSON(data []byte) error {
}
const (
ackNonePolicyString = "none"
ackAllPolicyString = "all"
ackExplicitPolicyString = "explicit"
ackNonePolicyJSONString = `"none"`
ackAllPolicyJSONString = `"all"`
ackExplicitPolicyJSONString = `"explicit"`
)
var (
ackNonePolicyJSONBytes = []byte(ackNonePolicyJSONString)
ackAllPolicyJSONBytes = []byte(ackAllPolicyJSONString)
ackExplicitPolicyJSONBytes = []byte(ackExplicitPolicyJSONString)
)
func (ap AckPolicy) MarshalJSON() ([]byte, error) {
switch ap {
case AckNone:
return json.Marshal(ackNonePolicyString)
return ackNonePolicyJSONBytes, nil
case AckAll:
return json.Marshal(ackAllPolicyString)
return ackAllPolicyJSONBytes, nil
case AckExplicit:
return json.Marshal(ackExplicitPolicyString)
return ackExplicitPolicyJSONBytes, nil
default:
return nil, fmt.Errorf("can not marshal %v", ap)
}
@@ -593,11 +607,11 @@ func (ap AckPolicy) MarshalJSON() ([]byte, error) {
func (ap *AckPolicy) UnmarshalJSON(data []byte) error {
switch string(data) {
case jsonString(ackNonePolicyString):
case ackNonePolicyJSONString:
*ap = AckNone
case jsonString(ackAllPolicyString):
case ackAllPolicyJSONString:
*ap = AckAll
case jsonString(ackExplicitPolicyString):
case ackExplicitPolicyJSONString:
*ap = AckExplicit
default:
return fmt.Errorf("can not unmarshal %q", data)
@@ -606,16 +620,21 @@ func (ap *AckPolicy) UnmarshalJSON(data []byte) error {
}
const (
replayInstantPolicyString = "instant"
replayOriginalPolicyString = "original"
replayInstantPolicyJSONString = `"instant"`
replayOriginalPolicyJSONString = `"original"`
)
var (
replayInstantPolicyJSONBytes = []byte(replayInstantPolicyJSONString)
replayOriginalPolicyJSONBytes = []byte(replayOriginalPolicyJSONString)
)
func (rp ReplayPolicy) MarshalJSON() ([]byte, error) {
switch rp {
case ReplayInstant:
return json.Marshal(replayInstantPolicyString)
return replayInstantPolicyJSONBytes, nil
case ReplayOriginal:
return json.Marshal(replayOriginalPolicyString)
return replayOriginalPolicyJSONBytes, nil
default:
return nil, fmt.Errorf("can not marshal %v", rp)
}
@@ -623,9 +642,9 @@ func (rp ReplayPolicy) MarshalJSON() ([]byte, error) {
func (rp *ReplayPolicy) UnmarshalJSON(data []byte) error {
switch string(data) {
case jsonString(replayInstantPolicyString):
case replayInstantPolicyJSONString:
*rp = ReplayInstant
case jsonString(replayOriginalPolicyString):
case replayOriginalPolicyJSONString:
*rp = ReplayOriginal
default:
return fmt.Errorf("can not unmarshal %q", data)
@@ -634,28 +653,38 @@ func (rp *ReplayPolicy) UnmarshalJSON(data []byte) error {
}
const (
deliverAllPolicyString = "all"
deliverLastPolicyString = "last"
deliverNewPolicyString = "new"
deliverByStartSequenceString = "by_start_sequence"
deliverByStartTimeString = "by_start_time"
deliverLastPerPolicyString = "last_per_subject"
deliverUndefinedString = "undefined"
deliverAllPolicyJSONString = `"all"`
deliverLastPolicyJSONString = `"last"`
deliverNewPolicyJSONString = `"new"`
deliverByStartSequenceJSONString = `"by_start_sequence"`
deliverByStartTimeJSONString = `"by_start_time"`
deliverLastPerPolicyJSONString = `"last_per_subject"`
deliverUndefinedJSONString = `"undefined"`
)
var (
deliverAllPolicyJSONBytes = []byte(deliverAllPolicyJSONString)
deliverLastPolicyJSONBytes = []byte(deliverLastPolicyJSONString)
deliverNewPolicyJSONBytes = []byte(deliverNewPolicyJSONString)
deliverByStartSequenceJSONBytes = []byte(deliverByStartSequenceJSONString)
deliverByStartTimeJSONBytes = []byte(deliverByStartTimeJSONString)
deliverLastPerPolicyJSONBytes = []byte(deliverLastPerPolicyJSONString)
deliverUndefinedJSONBytes = []byte(deliverUndefinedJSONString)
)
func (p *DeliverPolicy) UnmarshalJSON(data []byte) error {
switch string(data) {
case jsonString(deliverAllPolicyString), jsonString(deliverUndefinedString):
case deliverAllPolicyJSONString, deliverUndefinedJSONString:
*p = DeliverAll
case jsonString(deliverLastPolicyString):
case deliverLastPolicyJSONString:
*p = DeliverLast
case jsonString(deliverLastPerPolicyString):
case deliverLastPerPolicyJSONString:
*p = DeliverLastPerSubject
case jsonString(deliverNewPolicyString):
case deliverNewPolicyJSONString:
*p = DeliverNew
case jsonString(deliverByStartSequenceString):
case deliverByStartSequenceJSONString:
*p = DeliverByStartSequence
case jsonString(deliverByStartTimeString):
case deliverByStartTimeJSONString:
*p = DeliverByStartTime
default:
return fmt.Errorf("can not unmarshal %q", data)
@@ -667,19 +696,19 @@ func (p *DeliverPolicy) UnmarshalJSON(data []byte) error {
func (p DeliverPolicy) MarshalJSON() ([]byte, error) {
switch p {
case DeliverAll:
return json.Marshal(deliverAllPolicyString)
return deliverAllPolicyJSONBytes, nil
case DeliverLast:
return json.Marshal(deliverLastPolicyString)
return deliverLastPolicyJSONBytes, nil
case DeliverLastPerSubject:
return json.Marshal(deliverLastPerPolicyString)
return deliverLastPerPolicyJSONBytes, nil
case DeliverNew:
return json.Marshal(deliverNewPolicyString)
return deliverNewPolicyJSONBytes, nil
case DeliverByStartSequence:
return json.Marshal(deliverByStartSequenceString)
return deliverByStartSequenceJSONBytes, nil
case DeliverByStartTime:
return json.Marshal(deliverByStartTimeString)
return deliverByStartTimeJSONBytes, nil
default:
return json.Marshal(deliverUndefinedString)
return deliverUndefinedJSONBytes, nil
}
}

View File

@@ -1,4 +1,4 @@
// Copyright 2019-2023 The NATS Authors
// Copyright 2019-2024 The NATS Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -29,6 +29,7 @@ import (
"strconv"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/klauspost/compress/s2"
@@ -219,7 +220,7 @@ type stream struct {
srv *Server
client *client
sysc *client
sid int
sid atomic.Uint64
pubAck []byte
outq *jsOutQ
msgs *ipQueue[*inMsg]
@@ -242,7 +243,7 @@ type stream struct {
mqch chan struct{}
active bool
ddloaded bool
closed bool
closed atomic.Bool
// Mirror
mirror *sourceInfo
@@ -276,7 +277,7 @@ type stream struct {
// Clustered mode.
sa *streamAssignment
node RaftNode
catchup bool
catchup atomic.Bool
syncSub *subscription
infoSub *subscription
clMu sync.Mutex
@@ -309,6 +310,7 @@ type sourceInfo struct {
start time.Time
lag uint64
err *ApiError
fails int
last time.Time
lreq time.Time
qch chan struct{}
@@ -368,9 +370,7 @@ type ddentry struct {
}
// Replicas Range
const (
StreamMaxReplicas = 5
)
const StreamMaxReplicas = 5
// AddStream adds a stream for the given account.
func (a *Account) addStream(config *StreamConfig) (*stream, error) {
@@ -1254,6 +1254,7 @@ func (s *Server) checkStreamCfg(config *StreamConfig, acc *Account) (StreamConfi
}
return exists, cfg
}
hasStream := func(streamName string) (bool, int32, []string) {
exists, cfg := getStream(streamName)
return exists, cfg.MaxMsgSize, cfg.Subjects
@@ -1632,13 +1633,7 @@ func (jsa *jsAccount) configUpdateCheck(old, new *StreamConfig, s *Server) (*Str
// Save the user configured MaxBytes.
newMaxBytes := cfg.MaxBytes
maxBytesOffset := int64(0)
if old.MaxBytes > 0 {
if excessRep := cfg.Replicas - old.Replicas; excessRep > 0 {
maxBytesOffset = old.MaxBytes * int64(excessRep)
}
}
// We temporarily set cfg.MaxBytes to maxBytesDiff because checkAllLimits
// adds cfg.MaxBytes to the current reserved limit and checks if we've gone
@@ -1670,7 +1665,11 @@ func (jsa *jsAccount) configUpdateCheck(old, new *StreamConfig, s *Server) (*Str
_, reserved = tieredStreamAndReservationCount(js.cluster.streams[acc.Name], tier, &cfg)
}
// reservation does not account for this stream, hence add the old value
reserved += int64(old.Replicas) * old.MaxBytes
if tier == _EMPTY_ && old.Replicas > 1 {
reserved += old.MaxBytes * int64(old.Replicas)
} else {
reserved += old.MaxBytes
}
if err := js.checkAllLimits(&selected, &cfg, reserved, maxBytesOffset); err != nil {
return nil, err
}
@@ -1774,6 +1773,8 @@ func (mset *stream) updateWithAdvisory(config *StreamConfig, sendAdvisory bool)
// Check for Sources.
if len(cfg.Sources) > 0 || len(ocfg.Sources) > 0 {
currentIName := make(map[string]struct{})
needsStartingSeqNum := make(map[string]struct{})
for _, s := range ocfg.Sources {
currentIName[s.iname] = struct{}{}
}
@@ -1807,18 +1808,25 @@ func (mset *stream) updateWithAdvisory(config *StreamConfig, sendAdvisory bool)
}
mset.sources[s.iname] = si
mset.setStartingSequenceForSource(s.iname, s.External)
mset.setSourceConsumer(s.iname, si.sseq+1, time.Time{})
needsStartingSeqNum[s.iname] = struct{}{}
} else {
// source already exists
delete(currentIName, s.iname)
}
}
// What is left in cuurentIName needs to be deleted.
// What is left in currentIName needs to be deleted.
for iName := range currentIName {
mset.cancelSourceConsumer(iName)
delete(mset.sources, iName)
}
neededCopy := make(map[string]struct{}, len(needsStartingSeqNum))
for iName := range needsStartingSeqNum {
neededCopy[iName] = struct{}{}
}
mset.setStartingSequenceForSources(needsStartingSeqNum)
for iName := range neededCopy {
mset.setSourceConsumer(iName, mset.sources[iName].sseq+1, time.Time{})
}
}
}
@@ -1957,9 +1965,9 @@ func (mset *stream) updateWithAdvisory(config *StreamConfig, sendAdvisory bool)
// Purge will remove all messages from the stream and underlying store based on the request.
func (mset *stream) purge(preq *JSApiStreamPurgeRequest) (purged uint64, err error) {
mset.mu.RLock()
if mset.client == nil || mset.store == nil {
if mset.closed.Load() {
mset.mu.RUnlock()
return 0, errors.New("invalid stream")
return 0, errStreamClosed
}
if mset.cfg.Sealed {
mset.mu.RUnlock()
@@ -2001,11 +2009,10 @@ func (mset *stream) purge(preq *JSApiStreamPurgeRequest) (purged uint64, err err
// no subject was specified, we can purge all consumers sequences
doPurge := preq == nil ||
preq.Subject == _EMPTY_ ||
// or consumer filter subject is equal to purged subject
preq.Subject == o.cfg.FilterSubject ||
// or consumer subject is subset of purged subject,
// consumer filter subject is equal to purged subject
// or consumer filter subject is subset of purged subject,
// but not the other way around.
subjectIsSubsetMatch(o.cfg.FilterSubject, preq.Subject)
o.isEqualOrSubsetMatch(preq.Subject)
o.mu.RUnlock()
if doPurge {
o.purge(fseq, lseq)
@@ -2025,24 +2032,17 @@ func (mset *stream) removeMsg(seq uint64) (bool, error) {
// DeleteMsg will remove a message from a stream.
func (mset *stream) deleteMsg(seq uint64) (bool, error) {
mset.mu.RLock()
if mset.client == nil {
mset.mu.RUnlock()
return false, fmt.Errorf("invalid stream")
if mset.closed.Load() {
return false, errStreamClosed
}
mset.mu.RUnlock()
return mset.store.RemoveMsg(seq)
}
// EraseMsg will securely remove a message and rewrite the data with random data.
func (mset *stream) eraseMsg(seq uint64) (bool, error) {
mset.mu.RLock()
if mset.client == nil {
mset.mu.RUnlock()
return false, fmt.Errorf("invalid stream")
if mset.closed.Load() {
return false, errStreamClosed
}
mset.mu.RUnlock()
return mset.store.EraseMsg(seq)
}
@@ -2370,34 +2370,56 @@ func (mset *stream) retryMirrorConsumer() error {
// Lock should be held.
func (mset *stream) skipMsgs(start, end uint64) {
node, store := mset.node, mset.store
// If we are not clustered we can short circuit now with store.SkipMsgs
if node == nil {
store.SkipMsgs(start, end-start+1)
mset.lseq = end
return
}
// FIXME (dlc) - We should allow proposals of DeleteEange, but would need to make sure all peers support.
// With syncRequest was easy to add bool into request.
var entries []*Entry
for seq := start; seq <= end; seq++ {
if node != nil {
entries = append(entries, &Entry{EntryNormal, encodeStreamMsg(_EMPTY_, _EMPTY_, nil, nil, seq-1, 0)})
// So a single message does not get too big.
if len(entries) > 10_000 {
node.ProposeDirect(entries)
// We need to re-craete `entries` because there is a reference
// to it in the node's pae map.
entries = entries[:0]
}
} else {
mset.lseq = store.SkipMsg()
entries = append(entries, &Entry{EntryNormal, encodeStreamMsg(_EMPTY_, _EMPTY_, nil, nil, seq-1, 0)})
// So a single message does not get too big.
if len(entries) > 10_000 {
node.ProposeDirect(entries)
// We need to re-create `entries` because there is a reference
// to it in the node's pae map.
entries = entries[:0]
}
}
// Send all at once.
if node != nil && len(entries) > 0 {
if len(entries) > 0 {
node.ProposeDirect(entries)
}
}
const (
// Base retry backoff duration.
retryBackOff = 5 * time.Second
// Maximum amount we will wait.
retryMaximum = 2 * time.Minute
)
// Calculate our backoff based on number of failures.
func calculateRetryBackoff(fails int) time.Duration {
backoff := time.Duration(retryBackOff) * time.Duration(fails*2)
if backoff > retryMaximum {
backoff = retryMaximum
}
return backoff
}
// This will schedule a call to setupMirrorConsumer, taking into account the last
// time it was retried and determine the soonest setSourceConsumer can be called
// without tripping the sourceConsumerRetryThreshold.
// time it was retried and determine the soonest setupMirrorConsumer can be called
// without tripping the sourceConsumerRetryThreshold. We will also take into account
// number of failures and will back off our retries.
// The mset.mirror pointer has been verified to be not nil by the caller.
//
// Lock held on entry
func (mset *stream) scheduleSetupMirrorConsumerRetryAsap() {
func (mset *stream) scheduleSetupMirrorConsumerRetry() {
// We are trying to figure out how soon we can retry. setupMirrorConsumer will reject
// a retry if last was done less than "sourceConsumerRetryThreshold" ago.
next := sourceConsumerRetryThreshold - time.Since(mset.mirror.lreq)
@@ -2405,9 +2427,12 @@ func (mset *stream) scheduleSetupMirrorConsumerRetryAsap() {
// It means that we have passed the threshold and so we are ready to go.
next = 0
}
// To make *sure* that the next request will not fail, add a bit of buffer
// and some randomness.
next += time.Duration(rand.Intn(int(10*time.Millisecond))) + 10*time.Millisecond
// Take into account failures here.
next += calculateRetryBackoff(mset.mirror.fails)
// Add some jitter.
next += time.Duration(rand.Intn(int(100*time.Millisecond))) + 100*time.Millisecond
time.AfterFunc(next, func() {
mset.mu.Lock()
mset.setupMirrorConsumer()
@@ -2418,6 +2443,9 @@ func (mset *stream) scheduleSetupMirrorConsumerRetryAsap() {
// Setup our mirror consumer.
// Lock should be held.
func (mset *stream) setupMirrorConsumer() error {
if mset.closed.Load() {
return errStreamClosed
}
if mset.outq == nil {
return errors.New("outq required")
}
@@ -2449,7 +2477,7 @@ func (mset *stream) setupMirrorConsumer() error {
// We want to throttle here in terms of how fast we request new consumers,
// or if the previous is still in progress.
if last := time.Since(mirror.lreq); last < sourceConsumerRetryThreshold || mirror.sip {
mset.scheduleSetupMirrorConsumerRetryAsap()
mset.scheduleSetupMirrorConsumerRetry()
return nil
}
mirror.lreq = time.Now()
@@ -2506,27 +2534,28 @@ func (mset *stream) setupMirrorConsumer() error {
mirror.sf = mset.cfg.Mirror.FilterSubject
}
sfs := make([]string, len(mset.cfg.Mirror.SubjectTransforms))
trs := make([]*subjectTransform, len(mset.cfg.Mirror.SubjectTransforms))
if lst := len(mset.cfg.Mirror.SubjectTransforms); lst > 0 {
sfs := make([]string, lst)
trs := make([]*subjectTransform, lst)
for i, tr := range mset.cfg.Mirror.SubjectTransforms {
// will not fail as already checked before that the transform will work
subjectTransform, err := NewSubjectTransform(tr.Source, tr.Destination)
if err != nil {
mset.srv.Errorf("Unable to get transform for mirror consumer: %v", err)
for i, tr := range mset.cfg.Mirror.SubjectTransforms {
// will not fail as already checked before that the transform will work
subjectTransform, err := NewSubjectTransform(tr.Source, tr.Destination)
if err != nil {
mset.srv.Errorf("Unable to get transform for mirror consumer: %v", err)
}
sfs[i] = tr.Source
trs[i] = subjectTransform
}
sfs[i] = tr.Source
trs[i] = subjectTransform
mirror.sfs = sfs
mirror.trs = trs
req.Config.FilterSubjects = sfs
}
mirror.sfs = sfs
mirror.trs = trs
req.Config.FilterSubjects = sfs
respCh := make(chan *JSApiConsumerCreateResponse, 1)
reply := infoReplySubject()
crSub, err := mset.subscribeInternal(reply, func(sub *subscription, c *client, _ *Account, subject, reply string, rmsg []byte) {
mset.unsubscribeUnlocked(sub)
mset.unsubscribe(sub)
_, msg := c.msgParts(rmsg)
var ccr JSApiConsumerCreateResponse
@@ -2535,11 +2564,14 @@ func (mset *stream) setupMirrorConsumer() error {
mset.setMirrorErr(ApiErrors[JSInvalidJSONErr])
return
}
respCh <- &ccr
select {
case respCh <- &ccr:
default:
}
})
if err != nil {
mirror.err = NewJSMirrorConsumerSetupFailedError(err, Unless(err))
mset.scheduleSetupMirrorConsumerRetryAsap()
mset.scheduleSetupMirrorConsumerRetry()
return nil
}
@@ -2557,26 +2589,9 @@ func (mset *stream) setupMirrorConsumer() error {
subject = strings.ReplaceAll(subject, "..", ".")
}
// We need to create the subscription that will receive the messages prior
// to sending the consumer create request, because in some complex topologies
// with gateways and optimistic mode, it is possible that the consumer starts
// delivering messages as soon as the consumer request is received.
qname := fmt.Sprintf("[ACC:%s] stream mirror '%s' of '%s' msgs", mset.acc.Name, mset.cfg.Name, mset.cfg.Mirror.Name)
// Create a new queue each time
mirror.msgs = newIPQueue[*inMsg](mset.srv, qname)
msgs := mirror.msgs
sub, err := mset.subscribeInternal(deliverSubject, func(sub *subscription, c *client, _ *Account, subject, reply string, rmsg []byte) {
hdr, msg := c.msgParts(copyBytes(rmsg)) // Need to copy.
mset.queueInbound(msgs, subject, reply, hdr, msg)
})
if err != nil {
mirror.err = NewJSMirrorConsumerSetupFailedError(err, Unless(err))
mset.unsubscribeUnlocked(crSub)
mset.scheduleSetupMirrorConsumerRetryAsap()
return nil
}
// Reset
mirror.msgs = nil
mirror.err = nil
mirror.sub = sub
mirror.sip = true
// Send the consumer create request
@@ -2592,7 +2607,13 @@ func (mset *stream) setupMirrorConsumer() error {
mset.mirror.sip = false
// If we need to retry, schedule now
if retry {
mset.scheduleSetupMirrorConsumerRetryAsap()
mset.mirror.fails++
// Cancel here since we can not do anything with this consumer at this point.
mset.cancelSourceInfo(mset.mirror)
mset.scheduleSetupMirrorConsumerRetry()
} else {
// Clear on success.
mset.mirror.fails = 0
}
}
mset.mu.Unlock()
@@ -2618,7 +2639,26 @@ func (mset *stream) setupMirrorConsumer() error {
mirror.err = ccr.Error
// Let's retry as soon as possible, but we are gated by sourceConsumerRetryThreshold
retry = true
mset.mu.Unlock()
return
} else {
// Setup actual subscription to process messages from our source.
qname := fmt.Sprintf("[ACC:%s] stream mirror '%s' of '%s' msgs", mset.acc.Name, mset.cfg.Name, mset.cfg.Mirror.Name)
// Create a new queue each time
mirror.msgs = newIPQueue[*inMsg](mset.srv, qname)
msgs := mirror.msgs
sub, err := mset.subscribeInternal(deliverSubject, func(sub *subscription, c *client, _ *Account, subject, reply string, rmsg []byte) {
hdr, msg := c.msgParts(copyBytes(rmsg)) // Need to copy.
mset.queueInbound(msgs, subject, reply, hdr, msg)
})
if err != nil {
mirror.err = NewJSMirrorConsumerSetupFailedError(err, Unless(err))
retry = true
mset.mu.Unlock()
return
}
// Save our sub.
mirror.sub = sub
// When an upstream stream expires messages or in general has messages that we want
// that are no longer available we need to adjust here.
@@ -2659,7 +2699,7 @@ func (mset *stream) setupMirrorConsumer() error {
mset.mu.Unlock()
ready.Wait()
case <-time.After(5 * time.Second):
mset.unsubscribeUnlocked(crSub)
mset.unsubscribe(crSub)
// We already waited 5 seconds, let's retry now.
retry = true
}
@@ -2687,7 +2727,10 @@ func (mset *stream) retrySourceConsumer(iName string) {
}
var ss = mset.streamSource(iName)
if ss != nil {
mset.setStartingSequenceForSource(iName, ss.External)
iNameMap := map[string]struct{}{
iName: {},
}
mset.setStartingSequenceForSources(iNameMap)
mset.retrySourceConsumerAtSeq(iName, si.sseq+1)
}
}
@@ -2734,8 +2777,10 @@ func (mset *stream) cancelSourceInfo(si *sourceInfo) {
close(si.qch)
si.qch = nil
}
si.msgs.drain()
si.msgs.unregister()
if si.msgs != nil {
si.msgs.drain()
si.msgs.unregister()
}
}
const sourceConsumerRetryThreshold = 2 * time.Second
@@ -2745,7 +2790,7 @@ const sourceConsumerRetryThreshold = 2 * time.Second
// without tripping the sourceConsumerRetryThreshold.
//
// Lock held on entry
func (mset *stream) scheduleSetSourceConsumerRetryAsap(si *sourceInfo, seq uint64, startTime time.Time) {
func (mset *stream) scheduleSetSourceConsumerRetry(si *sourceInfo, seq uint64, startTime time.Time) {
// We are trying to figure out how soon we can retry. setSourceConsumer will reject
// a retry if last was done less than "sourceConsumerRetryThreshold" ago.
next := sourceConsumerRetryThreshold - time.Since(si.lreq)
@@ -2753,16 +2798,19 @@ func (mset *stream) scheduleSetSourceConsumerRetryAsap(si *sourceInfo, seq uint6
// It means that we have passed the threshold and so we are ready to go.
next = 0
}
// Take into account failures here.
next += calculateRetryBackoff(si.fails)
// To make *sure* that the next request will not fail, add a bit of buffer
// and some randomness.
next += time.Duration(rand.Intn(int(10*time.Millisecond))) + 10*time.Millisecond
mset.scheduleSetSourceConsumerRetry(si.iname, seq, next, startTime)
mset.scheduleSetSourceConsumer(si.iname, seq, next, startTime)
}
// Simply schedules setSourceConsumer at the given delay.
//
// Lock held on entry
func (mset *stream) scheduleSetSourceConsumerRetry(iname string, seq uint64, delay time.Duration, startTime time.Time) {
func (mset *stream) scheduleSetSourceConsumer(iname string, seq uint64, delay time.Duration, startTime time.Time) {
if mset.sourceRetries == nil {
mset.sourceRetries = map[string]*time.Timer{}
}
@@ -2784,6 +2832,11 @@ func (mset *stream) scheduleSetSourceConsumerRetry(iname string, seq uint64, del
// Lock should be held.
func (mset *stream) setSourceConsumer(iname string, seq uint64, startTime time.Time) {
// Ignore if closed.
if mset.closed.Load() {
return
}
si := mset.sources[iname]
if si == nil {
return
@@ -2799,7 +2852,7 @@ func (mset *stream) setSourceConsumer(iname string, seq uint64, startTime time.T
// We want to throttle here in terms of how fast we request new consumers,
// or if the previous is still in progress.
if last := time.Since(si.lreq); last < sourceConsumerRetryThreshold || si.sip {
mset.scheduleSetSourceConsumerRetryAsap(si, seq, startTime)
mset.scheduleSetSourceConsumerRetry(si, seq, startTime)
return
}
si.lreq = time.Now()
@@ -2867,18 +2920,21 @@ func (mset *stream) setSourceConsumer(iname string, seq uint64, startTime time.T
respCh := make(chan *JSApiConsumerCreateResponse, 1)
reply := infoReplySubject()
crSub, err := mset.subscribeInternal(reply, func(sub *subscription, c *client, _ *Account, subject, reply string, rmsg []byte) {
mset.unsubscribeUnlocked(sub)
mset.unsubscribe(sub)
_, msg := c.msgParts(rmsg)
var ccr JSApiConsumerCreateResponse
if err := json.Unmarshal(msg, &ccr); err != nil {
c.Warnf("JetStream bad source consumer create response: %q", msg)
return
}
respCh <- &ccr
select {
case respCh <- &ccr:
default:
}
})
if err != nil {
si.err = NewJSSourceConsumerSetupFailedError(err, Unless(err))
mset.scheduleSetSourceConsumerRetryAsap(si, seq, startTime)
mset.scheduleSetSourceConsumerRetry(si, seq, startTime)
return
}
@@ -2904,26 +2960,9 @@ func (mset *stream) setSourceConsumer(iname string, seq uint64, startTime time.T
// Marshal request.
b, _ := json.Marshal(req)
// We need to create the subscription that will receive the messages prior
// to sending the consumer create request, because in some complex topologies
// with gateways and optimistic mode, it is possible that the consumer starts
// delivering messages as soon as the consumer request is received.
qname := fmt.Sprintf("[ACC:%s] stream source '%s' from '%s' msgs", mset.acc.Name, mset.cfg.Name, si.name)
// Create a new queue each time
si.msgs = newIPQueue[*inMsg](mset.srv, qname)
msgs := si.msgs
sub, err := mset.subscribeInternal(deliverSubject, func(sub *subscription, c *client, _ *Account, subject, reply string, rmsg []byte) {
hdr, msg := c.msgParts(copyBytes(rmsg)) // Need to copy.
mset.queueInbound(msgs, subject, reply, hdr, msg)
})
if err != nil {
si.err = NewJSSourceConsumerSetupFailedError(err, Unless(err))
mset.unsubscribeUnlocked(crSub)
mset.scheduleSetSourceConsumerRetryAsap(si, seq, startTime)
return
}
// Reset
si.msgs = nil
si.err = nil
si.sub = sub
si.sip = true
// Send the consumer create request
@@ -2939,7 +2978,13 @@ func (mset *stream) setSourceConsumer(iname string, seq uint64, startTime time.T
si.sip = false
// If we need to retry, schedule now
if retry {
mset.scheduleSetSourceConsumerRetryAsap(si, seq, startTime)
si.fails++
// Cancel here since we can not do anything with this consumer at this point.
mset.cancelSourceInfo(si)
mset.scheduleSetSourceConsumerRetry(si, seq, startTime)
} else {
// Clear on success.
si.fails = 0
}
}
mset.mu.Unlock()
@@ -2954,7 +2999,7 @@ func (mset *stream) setSourceConsumer(iname string, seq uint64, startTime time.T
ready := sync.WaitGroup{}
mset.mu.Lock()
// Check that it has not been removed or canceled (si.sub would be nil)
if si := mset.sources[iname]; si != nil && si.sub != nil {
if si := mset.sources[iname]; si != nil {
si.err = nil
if ccr.Error != nil || ccr.ConsumerInfo == nil {
// Note: this warning can happen a few times when starting up the server when sourcing streams are
@@ -2964,7 +3009,27 @@ func (mset *stream) setSourceConsumer(iname string, seq uint64, startTime time.T
si.err = ccr.Error
// Let's retry as soon as possible, but we are gated by sourceConsumerRetryThreshold
retry = true
mset.mu.Unlock()
return
} else {
// Setup actual subscription to process messages from our source.
qname := fmt.Sprintf("[ACC:%s] stream source '%s' from '%s' msgs", mset.acc.Name, mset.cfg.Name, si.name)
// Create a new queue each time
si.msgs = newIPQueue[*inMsg](mset.srv, qname)
msgs := si.msgs
sub, err := mset.subscribeInternal(deliverSubject, func(sub *subscription, c *client, _ *Account, subject, reply string, rmsg []byte) {
hdr, msg := c.msgParts(copyBytes(rmsg)) // Need to copy.
mset.queueInbound(msgs, subject, reply, hdr, msg)
})
if err != nil {
si.err = NewJSSourceConsumerSetupFailedError(err, Unless(err))
retry = true
mset.mu.Unlock()
return
}
// Save our sub.
si.sub = sub
if si.sseq != ccr.ConsumerInfo.Delivered.Stream {
si.sseq = ccr.ConsumerInfo.Delivered.Stream + 1
}
@@ -2991,7 +3056,7 @@ func (mset *stream) setSourceConsumer(iname string, seq uint64, startTime time.T
mset.mu.Unlock()
ready.Wait()
case <-time.After(5 * time.Second):
mset.unsubscribeUnlocked(crSub)
mset.unsubscribe(crSub)
// We already waited 5 seconds, let's retry now.
retry = true
}
@@ -3281,18 +3346,20 @@ func streamAndSeq(shdr string) (string, string, uint64) {
}
// Lock should be held.
func (mset *stream) setStartingSequenceForSource(iName string, external *ExternalStream) {
si := mset.sources[iName]
if si == nil {
return
}
func (mset *stream) setStartingSequenceForSources(iNames map[string]struct{}) {
var state StreamState
mset.store.FastState(&state)
// Do not reset sseq here so we can remember when purge/expiration happens.
if state.Msgs == 0 {
si.dseq = 0
for iName := range iNames {
si := mset.sources[iName]
if si == nil {
continue
} else {
si.dseq = 0
}
}
return
}
@@ -3307,10 +3374,26 @@ func (mset *stream) setStartingSequenceForSource(iName string, external *Externa
continue
}
streamName, indexName, sseq := streamAndSeq(string(ss))
if indexName == si.iname || (indexName == _EMPTY_ && (streamName == si.name || (external != nil && streamName == si.name+":"+getHash(external.ApiPrefix)))) {
if _, ok := iNames[indexName]; ok {
si := mset.sources[indexName]
si.sseq = sseq
si.dseq = 0
return
delete(iNames, indexName)
} else if indexName == _EMPTY_ && streamName != _EMPTY_ {
for iName := range iNames {
// TODO streamSource is a linear walk, to optimize later
if si := mset.sources[iName]; si != nil && streamName == si.name ||
(mset.streamSource(iName).External != nil && streamName == si.name+":"+getHash(mset.streamSource(iName).External.ApiPrefix)) {
si.sseq = sseq
si.dseq = 0
delete(iNames, iName)
break
}
}
}
if len(iNames) == 0 {
break
}
}
}
@@ -3489,7 +3572,7 @@ func (mset *stream) subscribeToStream() error {
mset.mirror.sfs = sfs
mset.mirror.trs = trs
// delay the actual mirror consumer creation for after a delay
mset.scheduleSetupMirrorConsumerRetryAsap()
mset.scheduleSetupMirrorConsumerRetry()
} else if len(mset.cfg.Sources) > 0 {
// Setup the initial source infos for the sources
mset.resetSourceInfo()
@@ -3620,55 +3703,43 @@ func (mset *stream) unsubscribeToStream(stopping bool) error {
return nil
}
// Lock should be held.
// Lock does NOT need to be held, we set the client on setup and never change it at this point.
func (mset *stream) subscribeInternal(subject string, cb msgHandler) (*subscription, error) {
c := mset.client
if c == nil {
return nil, fmt.Errorf("invalid stream")
if mset.closed.Load() {
return nil, errStreamClosed
}
if cb == nil {
return nil, fmt.Errorf("undefined message handler")
return nil, errInvalidMsgHandler
}
mset.sid++
c := mset.client
sid := int(mset.sid.Add(1))
// Now create the subscription
return c.processSub([]byte(subject), nil, []byte(strconv.Itoa(mset.sid)), cb, false)
return c.processSub([]byte(subject), nil, []byte(strconv.Itoa(sid)), cb, false)
}
// Helper for unlocked stream.
func (mset *stream) subscribeInternalUnlocked(subject string, cb msgHandler) (*subscription, error) {
mset.mu.Lock()
defer mset.mu.Unlock()
return mset.subscribeInternal(subject, cb)
}
// Lock should be held.
// Lock does NOT need to be held, we set the client on setup and never change it at this point.
func (mset *stream) queueSubscribeInternal(subject, group string, cb msgHandler) (*subscription, error) {
c := mset.client
if c == nil {
return nil, fmt.Errorf("invalid stream")
if mset.closed.Load() {
return nil, errStreamClosed
}
if cb == nil {
return nil, fmt.Errorf("undefined message handler")
return nil, errInvalidMsgHandler
}
mset.sid++
c := mset.client
sid := int(mset.sid.Add(1))
// Now create the subscription
return c.processSub([]byte(subject), []byte(group), []byte(strconv.Itoa(mset.sid)), cb, false)
return c.processSub([]byte(subject), []byte(group), []byte(strconv.Itoa(sid)), cb, false)
}
// This will unsubscribe us from the exact subject given.
// We do not currently track the subs so do not have the sid.
// This should be called only on an update.
// Lock should be held.
// Lock does NOT need to be held, we set the client on setup and never change it at this point.
func (mset *stream) unsubscribeInternal(subject string) error {
c := mset.client
if c == nil {
return fmt.Errorf("invalid stream")
if mset.closed.Load() {
return errStreamClosed
}
c := mset.client
var sid []byte
c.mu.Lock()
for _, sub := range c.subs {
@@ -3687,18 +3758,12 @@ func (mset *stream) unsubscribeInternal(subject string) error {
// Lock should be held.
func (mset *stream) unsubscribe(sub *subscription) {
if sub == nil || mset.client == nil {
if sub == nil || mset.closed.Load() {
return
}
mset.client.processUnsub(sub.sid)
}
func (mset *stream) unsubscribeUnlocked(sub *subscription) {
mset.mu.Lock()
mset.unsubscribe(sub)
mset.mu.Unlock()
}
func (mset *stream) setupStore(fsCfg *FileStoreConfig) error {
mset.mu.Lock()
mset.created = time.Now().UTC()
@@ -4078,20 +4143,21 @@ func (mset *stream) processInboundJetStreamMsg(_ *subscription, c *client, _ *Ac
}
var (
errLastSeqMismatch = errors.New("last sequence mismatch")
errMsgIdDuplicate = errors.New("msgid is duplicate")
errStreamClosed = errors.New("stream closed")
errLastSeqMismatch = errors.New("last sequence mismatch")
errMsgIdDuplicate = errors.New("msgid is duplicate")
errStreamClosed = errors.New("stream closed")
errInvalidMsgHandler = errors.New("undefined message handler")
)
// processJetStreamMsg is where we try to actually process the stream msg.
func (mset *stream) processJetStreamMsg(subject, reply string, hdr, msg []byte, lseq uint64, ts int64) error {
mset.mu.Lock()
c, s, store := mset.client, mset.srv, mset.store
if mset.closed || c == nil {
mset.mu.Unlock()
if mset.closed.Load() {
return errStreamClosed
}
mset.mu.Lock()
s, store := mset.srv, mset.store
// Apply the input subject transform if any
if mset.itr != nil {
ts, err := mset.itr.Match(subject)
@@ -4453,7 +4519,7 @@ func (mset *stream) processJetStreamMsg(subject, reply string, hdr, msg []byte,
return err
}
if exceeded, apiErr := jsa.limitsExceeded(stype, tierName); exceeded {
if exceeded, apiErr := jsa.limitsExceeded(stype, tierName, mset.cfg.Replicas); exceeded {
s.RateLimitWarnf("JetStream resource limits exceeded for account: %q", accName)
if canRespond {
resp.PubAck = &PubAck{Stream: name}
@@ -4911,8 +4977,9 @@ func (mset *stream) stop(deleteFlag, advisory bool) error {
jsa.mu.Unlock()
// Mark as closed, kick monitor and collect consumers first.
mset.closed.Store(true)
mset.mu.Lock()
mset.closed = true
// Signal to the monitor loop.
// Can't use qch here.
if mset.mqch != nil {
@@ -5016,7 +5083,6 @@ func (mset *stream) stop(deleteFlag, advisory bool) error {
// Snapshot store.
store := mset.store
c := mset.client
mset.client = nil
// Clustered cleanup.
mset.mu.Unlock()
@@ -5291,6 +5357,8 @@ func (mset *stream) Store() StreamStore {
// Lock should be held.
func (mset *stream) partitionUnique(name string, partitions []string) bool {
for _, partition := range partitions {
psa := [32]string{}
pts := tokenizeSubjectIntoSlice(psa[:0], partition)
for n, o := range mset.consumers {
// Skip the consumer being checked.
if n == name {
@@ -5300,8 +5368,8 @@ func (mset *stream) partitionUnique(name string, partitions []string) bool {
return false
}
for _, filter := range o.subjf {
if subjectIsSubsetMatch(partition, filter.subject) ||
subjectIsSubsetMatch(filter.subject, partition) {
if isSubsetMatchTokenized(pts, filter.tokenizedSubject) ||
isSubsetMatchTokenized(filter.tokenizedSubject, pts) {
return false
}
}
@@ -5459,7 +5527,7 @@ func (mset *stream) ackMsg(o *consumer, seq uint64) {
// Don't make this RLock(). We need to have only 1 running at a time to gauge interest across all consumers.
mset.mu.Lock()
if mset.closed || mset.store == nil || mset.cfg.Retention == LimitsPolicy {
if mset.closed.Load() || mset.cfg.Retention == LimitsPolicy {
mset.mu.Unlock()
return
}
@@ -5506,14 +5574,10 @@ func (mset *stream) ackMsg(o *consumer, seq uint64) {
// Snapshot creates a snapshot for the stream and possibly consumers.
func (mset *stream) snapshot(deadline time.Duration, checkMsgs, includeConsumers bool) (*SnapshotResult, error) {
mset.mu.RLock()
if mset.client == nil || mset.store == nil {
mset.mu.RUnlock()
return nil, errors.New("invalid stream")
if mset.closed.Load() {
return nil, errStreamClosed
}
store := mset.store
mset.mu.RUnlock()
return store.Snapshot(deadline, checkMsgs, includeConsumers)
}

View File

@@ -561,3 +561,29 @@ func (tr *subjectTransform) reverse() *subjectTransform {
rtr, _ := NewSubjectTransformStrict(nsrc, ndest)
return rtr
}
// Will share relevant info regarding the subject.
// Returns valid, tokens, num pwcs, has fwc.
func subjectInfo(subject string) (bool, []string, int, bool) {
if subject == "" {
return false, nil, 0, false
}
npwcs := 0
sfwc := false
tokens := strings.Split(subject, tsep)
for _, t := range tokens {
if len(t) == 0 || sfwc {
return false, nil, 0, false
}
if len(t) > 1 {
continue
}
switch t[0] {
case fwc:
sfwc = true
case pwc:
npwcs++
}
}
return true, tokens, npwcs, sfwc
}

View File

@@ -19,6 +19,7 @@ import (
"strings"
"sync"
"sync/atomic"
"unicode/utf8"
)
// Sublist is a routing mechanism to handle subject distribution and
@@ -1075,9 +1076,22 @@ func IsValidPublishSubject(subject string) bool {
// IsValidSubject returns true if a subject is valid, false otherwise
func IsValidSubject(subject string) bool {
return isValidSubject(subject, false)
}
func isValidSubject(subject string, checkRunes bool) bool {
if subject == _EMPTY_ {
return false
}
if checkRunes {
// Since casting to a string will always produce valid UTF-8, we need to look for replacement runes.
// This signals something is off or corrupt.
for _, r := range subject {
if r == utf8.RuneError {
return false
}
}
}
sfwc := false
tokens := strings.Split(subject, tsep)
for _, t := range tokens {
@@ -1101,32 +1115,6 @@ func IsValidSubject(subject string) bool {
return true
}
// Will share relevant info regarding the subject.
// Returns valid, tokens, num pwcs, has fwc.
func subjectInfo(subject string) (bool, []string, int, bool) {
if subject == "" {
return false, nil, 0, false
}
npwcs := 0
sfwc := false
tokens := strings.Split(subject, tsep)
for _, t := range tokens {
if len(t) == 0 || sfwc {
return false, nil, 0, false
}
if len(t) > 1 {
continue
}
switch t[0] {
case fwc:
sfwc = true
case pwc:
npwcs++
}
}
return true, tokens, npwcs, sfwc
}
// IsValidLiteralSubject returns true if a subject is valid and literal (no wildcards), false otherwise
func IsValidLiteralSubject(subject string) bool {
return isValidLiteralSubject(strings.Split(subject, tsep))

View File

@@ -2,9 +2,9 @@
[![License Apache 2](https://img.shields.io/badge/License-Apache2-blue.svg)](https://www.apache.org/licenses/LICENSE-2.0)
[![Go Report Card](https://goreportcard.com/badge/github.com/nats-io/nkeys)](https://goreportcard.com/report/github.com/nats-io/nkeys)
[![Build Status](https://app.travis-ci.com/nats-io/nkeys.svg?branch=master)](https://app.travis-ci.com/nats-io/nkeys)
[![Build Status](https://github.com/nats-io/nkeys/actions/workflows/release.yaml/badge.svg)](https://github.com/nats-io/nkeys/actions/workflows/release.yaml/badge.svg)
[![GoDoc](https://godoc.org/github.com/nats-io/nkeys?status.svg)](https://godoc.org/github.com/nats-io/nkeys)
[![Coverage Status](https://coveralls.io/repos/github/nats-io/nkeys/badge.svg?branch=master&service=github)](https://coveralls.io/github/nats-io/nkeys?branch=master)
[![Coverage Status](https://coveralls.io/repos/github/nats-io/nkeys/badge.svg?branch=main&service=github)](https://coveralls.io/github/nats-io/nkeys?branch=main)
A public-key signature system based on [Ed25519](https://ed25519.cr.yp.to/) for the NATS ecosystem.
@@ -66,4 +66,3 @@ user2, _ := nkeys.FromRawSeed(PrefixByteUser, rawSeed)
Unless otherwise noted, the NATS source files are distributed
under the Apache Version 2.0 license found in the LICENSE file.

View File

@@ -19,7 +19,7 @@ package nkeys
import "io"
// Version is our current version
const Version = "0.4.6"
const Version = "0.4.7"
// KeyPair provides the central interface to nkeys.
type KeyPair interface {

7
vendor/modules.txt vendored
View File

@@ -1389,9 +1389,10 @@ github.com/mschoch/smat
# github.com/nats-io/jwt/v2 v2.5.3
## explicit; go 1.18
github.com/nats-io/jwt/v2
# github.com/nats-io/nats-server/v2 v2.10.7
# github.com/nats-io/nats-server/v2 v2.10.9
## explicit; go 1.20
github.com/nats-io/nats-server/v2/conf
github.com/nats-io/nats-server/v2/internal/fastrand
github.com/nats-io/nats-server/v2/internal/ldap
github.com/nats-io/nats-server/v2/logger
github.com/nats-io/nats-server/v2/server
@@ -1406,8 +1407,8 @@ github.com/nats-io/nats.go
github.com/nats-io/nats.go/encoders/builtin
github.com/nats-io/nats.go/internal/parser
github.com/nats-io/nats.go/util
# github.com/nats-io/nkeys v0.4.6
## explicit; go 1.19
# github.com/nats-io/nkeys v0.4.7
## explicit; go 1.20
github.com/nats-io/nkeys
# github.com/nats-io/nuid v1.0.1
## explicit