Merge pull request #104 from opencloud-eu/dependabot/go_modules/github.com/nats-io/nats-server/v2-2.10.24

Bump github.com/nats-io/nats-server/v2 from 2.10.22 to 2.10.24
This commit is contained in:
Jörn Friedrich Dreyer
2025-01-22 07:52:17 +01:00
committed by GitHub
48 changed files with 2286 additions and 970 deletions

6
go.mod
View File

@@ -57,7 +57,7 @@ require (
github.com/mitchellh/mapstructure v1.5.0
github.com/mna/pigeon v1.3.0
github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826
github.com/nats-io/nats-server/v2 v2.10.22
github.com/nats-io/nats-server/v2 v2.10.24
github.com/nats-io/nats.go v1.37.0
github.com/oklog/run v1.1.0
github.com/olekukonko/tablewriter v0.0.5
@@ -268,8 +268,8 @@ require (
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/mschoch/smat v0.2.0 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/nats-io/jwt/v2 v2.5.8 // indirect
github.com/nats-io/nkeys v0.4.7 // indirect
github.com/nats-io/jwt/v2 v2.7.3 // indirect
github.com/nats-io/nkeys v0.4.9 // indirect
github.com/nats-io/nuid v1.0.1 // indirect
github.com/nxadm/tail v1.4.8 // indirect
github.com/opencontainers/runtime-spec v1.1.0 // indirect

12
go.sum
View File

@@ -831,14 +831,14 @@ github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8m
github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
github.com/namedotcom/go v0.0.0-20180403034216-08470befbe04/go.mod h1:5sN+Lt1CaY4wsPvgQH/jsuJi4XO2ssZbdsIizr4CVC8=
github.com/nats-io/jwt/v2 v2.5.8 h1:uvdSzwWiEGWGXf+0Q+70qv6AQdvcvxrv9hPM0RiPamE=
github.com/nats-io/jwt/v2 v2.5.8/go.mod h1:ZdWS1nZa6WMZfFwwgpEaqBV8EPGVgOTDHN/wTbz0Y5A=
github.com/nats-io/nats-server/v2 v2.10.22 h1:Yt63BGu2c3DdMoBZNcR6pjGQwk/asrKU7VX846ibxDA=
github.com/nats-io/nats-server/v2 v2.10.22/go.mod h1:X/m1ye9NYansUXYFrbcDwUi/blHkrgHh2rgCJaakonk=
github.com/nats-io/jwt/v2 v2.7.3 h1:6bNPK+FXgBeAqdj4cYQ0F8ViHRbi7woQLq4W29nUAzE=
github.com/nats-io/jwt/v2 v2.7.3/go.mod h1:GvkcbHhKquj3pkioy5put1wvPxs78UlZ7D/pY+BgZk4=
github.com/nats-io/nats-server/v2 v2.10.24 h1:KcqqQAD0ZZcG4yLxtvSFJY7CYKVYlnlWoAiVZ6i/IY4=
github.com/nats-io/nats-server/v2 v2.10.24/go.mod h1:olvKt8E5ZlnjyqBGbAXtxvSQKsPodISK5Eo/euIta4s=
github.com/nats-io/nats.go v1.37.0 h1:07rauXbVnnJvv1gfIyghFEo6lUcYRY0WXc3x7x0vUxE=
github.com/nats-io/nats.go v1.37.0/go.mod h1:Ubdu4Nh9exXdSz0RVWRFBbRfrbSxOYd26oF0wkWclB8=
github.com/nats-io/nkeys v0.4.7 h1:RwNJbbIdYCoClSDNY7QVKZlyb/wfT6ugvFCiKy6vDvI=
github.com/nats-io/nkeys v0.4.7/go.mod h1:kqXRgRDPlGy7nGaEDMuYzmiJCIAAWDK0IMBtDmGD0nc=
github.com/nats-io/nkeys v0.4.9 h1:qe9Faq2Gxwi6RZnZMXfmGMZkg3afLLOtrU+gDZJ35b0=
github.com/nats-io/nkeys v0.4.9/go.mod h1:jcMqs+FLG+W5YO36OX6wFIFcmpdAns+w1Wm6D3I/evE=
github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw=
github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c=
github.com/nbio/st v0.0.0-20140626010706-e9e8d9816f32/go.mod h1:9wM+0iRr9ahx58uYLpLIr5fm8diHn0JbqRycJi6w0Ms=

View File

@@ -1,5 +1,5 @@
/*
* Copyright 2018-2023 The NATS Authors
* Copyright 2018-2024 The NATS Authors
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
@@ -133,7 +133,7 @@ func (o *OperatorLimits) Validate(vr *ValidationResults) {
}
}
// Mapping for publishes
// WeightedMapping for publishes
type WeightedMapping struct {
Subject Subject `json:"subject"`
Weight uint8 `json:"weight,omitempty"`
@@ -177,13 +177,13 @@ func (a *Account) AddMapping(sub Subject, to ...WeightedMapping) {
a.Mappings[sub] = to
}
// Enable external authorization for account users.
// ExternalAuthorization enables external authorization for account users.
// AuthUsers are those users specified to bypass the authorization callout and should be used for the authorization service itself.
// AllowedAccounts specifies which accounts, if any, that the authorization service can bind an authorized user to.
// The authorization response, a user JWT, will still need to be signed by the correct account.
// If optional XKey is specified, that is the public xkey (x25519) and the server will encrypt the request such that only the
// holder of the private key can decrypt. The auth service can also optionally encrypt the response back to the server using it's
// publick xkey which will be in the authorization request.
// public xkey which will be in the authorization request.
type ExternalAuthorization struct {
AuthUsers StringList `json:"auth_users,omitempty"`
AllowedAccounts StringList `json:"allowed_accounts,omitempty"`
@@ -194,12 +194,12 @@ func (ac *ExternalAuthorization) IsEnabled() bool {
return len(ac.AuthUsers) > 0
}
// Helper function to determine if external authorization is enabled.
// HasExternalAuthorization helper function to determine if external authorization is enabled.
func (a *Account) HasExternalAuthorization() bool {
return a.Authorization.IsEnabled()
}
// Helper function to setup external authorization.
// EnableExternalAuthorization helper function to setup external authorization.
func (a *Account) EnableExternalAuthorization(users ...string) {
a.Authorization.AuthUsers.Add(users...)
}
@@ -230,6 +230,20 @@ func (ac *ExternalAuthorization) Validate(vr *ValidationResults) {
}
}
const (
ClusterTrafficSystem = "system"
ClusterTrafficOwner = "owner"
)
type ClusterTraffic string
func (ct ClusterTraffic) Valid() error {
if ct == "" || ct == ClusterTrafficSystem || ct == ClusterTrafficOwner {
return nil
}
return fmt.Errorf("unknown cluster traffic option: %q", ct)
}
// Account holds account specific claims data
type Account struct {
Imports Imports `json:"imports,omitempty"`
@@ -241,6 +255,7 @@ type Account struct {
Mappings Mapping `json:"mappings,omitempty"`
Authorization ExternalAuthorization `json:"authorization,omitempty"`
Trace *MsgTrace `json:"trace,omitempty"`
ClusterTraffic ClusterTraffic `json:"cluster_traffic,omitempty"`
Info
GenericFields
}
@@ -308,6 +323,10 @@ func (a *Account) Validate(acct *AccountClaims, vr *ValidationResults) {
}
a.SigningKeys.Validate(vr)
a.Info.Validate(vr)
if err := a.ClusterTraffic.Valid(); err != nil {
vr.AddError(err.Error())
}
}
// AccountClaims defines the body of an account JWT
@@ -338,13 +357,17 @@ func NewAccountClaims(subject string) *AccountClaims {
// Encode converts account claims into a JWT string
func (a *AccountClaims) Encode(pair nkeys.KeyPair) (string, error) {
return a.EncodeWithSigner(pair, nil)
}
func (a *AccountClaims) EncodeWithSigner(pair nkeys.KeyPair, fn SignFn) (string, error) {
if !nkeys.IsValidPublicAccountKey(a.Subject) {
return "", errors.New("expected subject to be account public key")
}
sort.Sort(a.Exports)
sort.Sort(a.Imports)
a.Type = AccountClaim
return a.ClaimsData.encode(pair, a)
return a.ClaimsData.encode(pair, a, fn)
}
// DecodeAccountClaims decodes account claims from a JWT string

View File

@@ -1,5 +1,5 @@
/*
* Copyright 2018 The NATS Authors
* Copyright 2018-2024 The NATS Authors
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
@@ -72,11 +72,15 @@ func NewActivationClaims(subject string) *ActivationClaims {
// Encode turns an activation claim into a JWT strimg
func (a *ActivationClaims) Encode(pair nkeys.KeyPair) (string, error) {
return a.EncodeWithSigner(pair, nil)
}
func (a *ActivationClaims) EncodeWithSigner(pair nkeys.KeyPair, fn SignFn) (string, error) {
if !nkeys.IsValidPublicAccountKey(a.ClaimsData.Subject) {
return "", errors.New("expected subject to be an account")
}
a.Type = ActivationClaim
return a.ClaimsData.encode(pair, a)
return a.ClaimsData.encode(pair, a, fn)
}
// DecodeActivationClaims tries to create an activation claim from a JWT string

View File

@@ -1,5 +1,5 @@
/*
* Copyright 2022 The NATS Authors
* Copyright 2022-2024 The NATS Authors
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
@@ -113,8 +113,12 @@ func (ac *AuthorizationRequestClaims) Validate(vr *ValidationResults) {
// Encode tries to turn the auth request claims into a JWT string.
func (ac *AuthorizationRequestClaims) Encode(pair nkeys.KeyPair) (string, error) {
return ac.EncodeWithSigner(pair, nil)
}
func (ac *AuthorizationRequestClaims) EncodeWithSigner(pair nkeys.KeyPair, fn SignFn) (string, error) {
ac.Type = AuthorizationRequestClaim
return ac.ClaimsData.encode(pair, ac)
return ac.ClaimsData.encode(pair, ac, fn)
}
// DecodeAuthorizationRequestClaims tries to parse an auth request claims from a JWT string
@@ -242,6 +246,10 @@ func (ar *AuthorizationResponseClaims) Validate(vr *ValidationResults) {
// Encode tries to turn the auth request claims into a JWT string.
func (ar *AuthorizationResponseClaims) Encode(pair nkeys.KeyPair) (string, error) {
ar.Type = AuthorizationResponseClaim
return ar.ClaimsData.encode(pair, ar)
return ar.EncodeWithSigner(pair, nil)
}
func (ar *AuthorizationResponseClaims) EncodeWithSigner(pair nkeys.KeyPair, fn SignFn) (string, error) {
ar.Type = AuthorizationResponseClaim
return ar.ClaimsData.encode(pair, ar, fn)
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright 2018-2022 The NATS Authors
* Copyright 2018-2024 The NATS Authors
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
@@ -68,10 +68,16 @@ func IsGenericClaimType(s string) bool {
}
}
// SignFn is used in an external sign environment. The function should be
// able to locate the private key for the specified pub key specified and sign the
// specified data returning the signature as generated.
type SignFn func(pub string, data []byte) ([]byte, error)
// Claims is a JWT claims
type Claims interface {
Claims() *ClaimsData
Encode(kp nkeys.KeyPair) (string, error)
EncodeWithSigner(pair nkeys.KeyPair, fn SignFn) (string, error)
ExpectedPrefixes() []nkeys.PrefixByte
Payload() interface{}
String() string
@@ -121,7 +127,7 @@ func serialize(v interface{}) (string, error) {
return encodeToString(j), nil
}
func (c *ClaimsData) doEncode(header *Header, kp nkeys.KeyPair, claim Claims) (string, error) {
func (c *ClaimsData) doEncode(header *Header, kp nkeys.KeyPair, claim Claims, fn SignFn) (string, error) {
if header == nil {
return "", errors.New("header is required")
}
@@ -200,9 +206,21 @@ func (c *ClaimsData) doEncode(header *Header, kp nkeys.KeyPair, claim Claims) (s
if header.Algorithm == AlgorithmNkeyOld {
return "", errors.New(AlgorithmNkeyOld + " not supported to write jwtV2")
} else if header.Algorithm == AlgorithmNkey {
sig, err := kp.Sign([]byte(toSign))
if err != nil {
return "", err
var sig []byte
if fn != nil {
pk, err := kp.PublicKey()
if err != nil {
return "", err
}
sig, err = fn(pk, []byte(toSign))
if err != nil {
return "", err
}
} else {
sig, err = kp.Sign([]byte(toSign))
if err != nil {
return "", err
}
}
eSig = encodeToString(sig)
} else {
@@ -224,8 +242,8 @@ func (c *ClaimsData) hash() (string, error) {
// Encode encodes a claim into a JWT token. The claim is signed with the
// provided nkey's private key
func (c *ClaimsData) encode(kp nkeys.KeyPair, payload Claims) (string, error) {
return c.doEncode(&Header{TokenTypeJwt, AlgorithmNkey}, kp, payload)
func (c *ClaimsData) encode(kp nkeys.KeyPair, payload Claims, fn SignFn) (string, error) {
return c.doEncode(&Header{TokenTypeJwt, AlgorithmNkey}, kp, payload, fn)
}
// Returns a JSON representation of the claim

View File

@@ -273,7 +273,7 @@ func isContainedIn(kind ExportType, subjects []Subject, vr *ValidationResults) {
}
// Validate calls validate on all of the exports
func (e *Exports) Validate(vr *ValidationResults) error {
func (e *Exports) Validate(vr *ValidationResults) {
var serviceSubjects []Subject
var streamSubjects []Subject
@@ -292,8 +292,6 @@ func (e *Exports) Validate(vr *ValidationResults) error {
isContainedIn(Service, serviceSubjects, vr)
isContainedIn(Stream, streamSubjects, vr)
return nil
}
// HasExportContainingSubject checks if the export list has an export with the provided subject

View File

@@ -1,5 +1,5 @@
/*
* Copyright 2018-2020 The NATS Authors
* Copyright 2018-2024 The NATS Authors
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
@@ -107,7 +107,11 @@ func (gc *GenericClaims) Payload() interface{} {
// Encode takes a generic claims and creates a JWT string
func (gc *GenericClaims) Encode(pair nkeys.KeyPair) (string, error) {
return gc.ClaimsData.encode(pair, gc)
return gc.ClaimsData.encode(pair, gc, nil)
}
func (gc *GenericClaims) EncodeWithSigner(pair nkeys.KeyPair, fn SignFn) (string, error) {
return gc.ClaimsData.encode(pair, gc, fn)
}
// Validate checks the generic part of the claims data

View File

@@ -1,5 +1,5 @@
/*
* Copyright 2018 The NATS Authors
* Copyright 2018-2024 The NATS Authors
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
@@ -191,6 +191,10 @@ func (oc *OperatorClaims) DidSign(op Claims) bool {
// Encode the claims into a JWT string
func (oc *OperatorClaims) Encode(pair nkeys.KeyPair) (string, error) {
return oc.EncodeWithSigner(pair, nil)
}
func (oc *OperatorClaims) EncodeWithSigner(pair nkeys.KeyPair, fn SignFn) (string, error) {
if !nkeys.IsValidPublicOperatorKey(oc.Subject) {
return "", errors.New("expected subject to be an operator public key")
}
@@ -199,7 +203,7 @@ func (oc *OperatorClaims) Encode(pair nkeys.KeyPair) (string, error) {
return "", err
}
oc.Type = OperatorClaim
return oc.ClaimsData.encode(pair, oc)
return oc.ClaimsData.encode(pair, oc, fn)
}
func (oc *OperatorClaims) ClaimType() ClaimType {

View File

@@ -309,7 +309,7 @@ func (l *Limits) Validate(vr *ValidationResults) {
}
}
if l.Times != nil && len(l.Times) > 0 {
if len(l.Times) > 0 {
for _, t := range l.Times {
t.Validate(vr)
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright 2018-2019 The NATS Authors
* Copyright 2018-2024 The NATS Authors
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
@@ -92,11 +92,15 @@ func (u *UserClaims) HasEmptyPermissions() bool {
// Encode tries to turn the user claims into a JWT string
func (u *UserClaims) Encode(pair nkeys.KeyPair) (string, error) {
return u.EncodeWithSigner(pair, nil)
}
func (u *UserClaims) EncodeWithSigner(pair nkeys.KeyPair, fn SignFn) (string, error) {
if !nkeys.IsValidPublicUserKey(u.Subject) {
return "", errors.New("expected subject to be user public key")
}
u.Type = UserClaim
return u.ClaimsData.encode(pair, u)
return u.ClaimsData.encode(pair, u, fn)
}
// DecodeUserClaims tries to parse a user claims from a JWT string

View File

@@ -46,11 +46,13 @@ type MatchByType int
const (
matchByIssuer MatchByType = iota + 1
matchBySubject
matchByThumbprint
)
var MatchByMap = map[string]MatchByType{
"issuer": matchByIssuer,
"subject": matchBySubject,
"issuer": matchByIssuer,
"subject": matchBySubject,
"thumbprint": matchByThumbprint,
}
var Usage = `

View File

@@ -1,4 +1,4 @@
// Copyright 2022-2023 The NATS Authors
// Copyright 2022-2024 The NATS Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -26,8 +26,7 @@ var _ = MATCHBYEMPTY
// otherKey implements crypto.Signer and crypto.Decrypter to satisfy linter on platforms that don't implement certstore
type otherKey struct{}
func TLSConfig(certStore StoreType, certMatchBy MatchByType, certMatch string, config *tls.Config) error {
_, _, _, _ = certStore, certMatchBy, certMatch, config
func TLSConfig(_ StoreType, _ MatchByType, _ string, _ []string, _ bool, _ *tls.Config) error {
return ErrOSNotCompatCertStore
}

View File

@@ -1,4 +1,4 @@
// Copyright 2022-2023 The NATS Authors
// Copyright 2022-2024 The NATS Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -41,26 +41,26 @@ import (
const (
// wincrypt.h constants
winAcquireCached = 0x1 // CRYPT_ACQUIRE_CACHE_FLAG
winAcquireSilent = 0x40 // CRYPT_ACQUIRE_SILENT_FLAG
winAcquireOnlyNCryptKey = 0x40000 // CRYPT_ACQUIRE_ONLY_NCRYPT_KEY_FLAG
winEncodingX509ASN = 1 // X509_ASN_ENCODING
winEncodingPKCS7 = 65536 // PKCS_7_ASN_ENCODING
winCertStoreProvSystem = 10 // CERT_STORE_PROV_SYSTEM
winCertStoreCurrentUser = uint32(winCertStoreCurrentUserID << winCompareShift) // CERT_SYSTEM_STORE_CURRENT_USER
winCertStoreLocalMachine = uint32(winCertStoreLocalMachineID << winCompareShift) // CERT_SYSTEM_STORE_LOCAL_MACHINE
winCertStoreCurrentUserID = 1 // CERT_SYSTEM_STORE_CURRENT_USER_ID
winCertStoreLocalMachineID = 2 // CERT_SYSTEM_STORE_LOCAL_MACHINE_ID
winInfoIssuerFlag = 4 // CERT_INFO_ISSUER_FLAG
winInfoSubjectFlag = 7 // CERT_INFO_SUBJECT_FLAG
winCompareNameStrW = 8 // CERT_COMPARE_NAME_STR_A
winCompareShift = 16 // CERT_COMPARE_SHIFT
winAcquireCached = windows.CRYPT_ACQUIRE_CACHE_FLAG
winAcquireSilent = windows.CRYPT_ACQUIRE_SILENT_FLAG
winAcquireOnlyNCryptKey = windows.CRYPT_ACQUIRE_ONLY_NCRYPT_KEY_FLAG
winEncodingX509ASN = windows.X509_ASN_ENCODING
winEncodingPKCS7 = windows.PKCS_7_ASN_ENCODING
winCertStoreProvSystem = windows.CERT_STORE_PROV_SYSTEM
winCertStoreCurrentUser = windows.CERT_SYSTEM_STORE_CURRENT_USER
winCertStoreLocalMachine = windows.CERT_SYSTEM_STORE_LOCAL_MACHINE
winCertStoreReadOnly = windows.CERT_STORE_READONLY_FLAG
winInfoIssuerFlag = windows.CERT_INFO_ISSUER_FLAG
winInfoSubjectFlag = windows.CERT_INFO_SUBJECT_FLAG
winCompareNameStrW = windows.CERT_COMPARE_NAME_STR_W
winCompareShift = windows.CERT_COMPARE_SHIFT
// Reference https://learn.microsoft.com/en-us/windows/win32/api/wincrypt/nf-wincrypt-certfindcertificateinstore
winFindIssuerStr = winCompareNameStrW<<winCompareShift | winInfoIssuerFlag // CERT_FIND_ISSUER_STR_W
winFindSubjectStr = winCompareNameStrW<<winCompareShift | winInfoSubjectFlag // CERT_FIND_SUBJECT_STR_W
winFindIssuerStr = windows.CERT_FIND_ISSUER_STR_W
winFindSubjectStr = windows.CERT_FIND_SUBJECT_STR_W
winFindHashStr = windows.CERT_FIND_HASH_STR
winNcryptKeySpec = 0xFFFFFFFF // CERT_NCRYPT_KEY_SPEC
winNcryptKeySpec = windows.CERT_NCRYPT_KEY_SPEC
winBCryptPadPKCS1 uintptr = 0x2
winBCryptPadPSS uintptr = 0x8 // Modern TLS 1.2+
@@ -76,7 +76,7 @@ const (
winECK3Magic = 0x334B4345 // "ECK3" BCRYPT_ECDH_PUBLIC_P384_MAGIC
winECK5Magic = 0x354B4345 // "ECK5" BCRYPT_ECDH_PUBLIC_P521_MAGIC
winCryptENotFound = 0x80092004 // CRYPT_E_NOT_FOUND
winCryptENotFound = windows.CRYPT_E_NOT_FOUND
providerMSSoftware = "Microsoft Software Key Storage Provider"
)
@@ -111,14 +111,24 @@ var (
crypto.SHA512: winWide("SHA512"), // BCRYPT_SHA512_ALGORITHM
}
// MY is well-known system store on Windows that holds personal certificates
winMyStore = winWide("MY")
// MY is well-known system store on Windows that holds personal certificates. Read
// More about the CA locations here:
// https://learn.microsoft.com/en-us/dotnet/framework/configure-apps/file-schema/wcf/certificate-of-clientcertificate-element?redirectedfrom=MSDN
// https://superuser.com/questions/217719/what-are-the-windows-system-certificate-stores
// https://docs.microsoft.com/en-us/windows/win32/seccrypto/certificate-stores
// https://learn.microsoft.com/en-us/windows/win32/seccrypto/system-store-locations
// https://stackoverflow.com/questions/63286085/which-x509-storename-refers-to-the-certificates-stored-beneath-trusted-root-cert#:~:text=4-,StoreName.,is%20%22Intermediate%20Certification%20Authorities%22.
winMyStore = winWide("MY")
winIntermediateCAStore = winWide("CA")
winRootStore = winWide("Root")
winAuthRootStore = winWide("AuthRoot")
// These DLLs must be available on all Windows hosts
winCrypt32 = windows.NewLazySystemDLL("crypt32.dll")
winNCrypt = windows.NewLazySystemDLL("ncrypt.dll")
winCertFindCertificateInStore = winCrypt32.NewProc("CertFindCertificateInStore")
winCertVerifyTimeValidity = winCrypt32.NewProc("CertVerifyTimeValidity")
winCryptAcquireCertificatePrivateKey = winCrypt32.NewProc("CryptAcquireCertificatePrivateKey")
winNCryptExportKey = winNCrypt.NewProc("NCryptExportKey")
winNCryptOpenStorageProvider = winNCrypt.NewProc("NCryptOpenStorageProvider")
@@ -156,9 +166,40 @@ type winPSSPaddingInfo struct {
cbSalt uint32
}
// TLSConfig fulfills the same function as reading cert and key pair from pem files but
// sources the Windows certificate store instead
func TLSConfig(certStore StoreType, certMatchBy MatchByType, certMatch string, config *tls.Config) error {
// createCACertsPool generates a CertPool from the Windows certificate store,
// adding all matching certificates from the caCertsMatch array to the pool.
// All matching certificates (vs first) are added to the pool based on a user
// request. If no certificates are found an error is returned.
func createCACertsPool(cs *winCertStore, storeType uint32, caCertsMatch []string, skipInvalid bool) (*x509.CertPool, error) {
var errs []error
caPool := x509.NewCertPool()
for _, s := range caCertsMatch {
lfs, err := cs.caCertsBySubjectMatch(s, storeType, skipInvalid)
if err != nil {
errs = append(errs, err)
} else {
for _, lf := range lfs {
caPool.AddCert(lf)
}
}
}
// If every lookup failed return the errors.
if len(errs) == len(caCertsMatch) {
return nil, fmt.Errorf("unable to match any CA certificate: %v", errs)
}
return caPool, nil
}
// TLSConfig fulfills the same function as reading cert and key pair from
// pem files but sources the Windows certificate store instead. The
// certMatchBy and certMatch fields search the "MY" certificate location
// for the first certificate that matches the certMatch field. The
// caCertsMatch field is used to search the Trusted Root, Third Party Root,
// and Intermediate Certificate Authority locations for certificates with
// Subjects matching the provided strings. If a match is found, the
// certificate is added to the pool that is used to verify the certificate
// chain.
func TLSConfig(certStore StoreType, certMatchBy MatchByType, certMatch string, caCertsMatch []string, skipInvalid bool, config *tls.Config) error {
var (
leaf *x509.Certificate
leafCtx *windows.CertContext
@@ -185,9 +226,11 @@ func TLSConfig(certStore StoreType, certMatchBy MatchByType, certMatch string, c
// certByIssuer or certBySubject
if certMatchBy == matchBySubject || certMatchBy == MATCHBYEMPTY {
leaf, leafCtx, err = cs.certBySubject(certMatch, scope)
leaf, leafCtx, err = cs.certBySubject(certMatch, scope, skipInvalid)
} else if certMatchBy == matchByIssuer {
leaf, leafCtx, err = cs.certByIssuer(certMatch, scope)
leaf, leafCtx, err = cs.certByIssuer(certMatch, scope, skipInvalid)
} else if certMatchBy == matchByThumbprint {
leaf, leafCtx, err = cs.certByThumbprint(certMatch, scope, skipInvalid)
} else {
return ErrBadMatchByType
}
@@ -205,6 +248,14 @@ func TLSConfig(certStore StoreType, certMatchBy MatchByType, certMatch string, c
if pk == nil {
return ErrNoPrivateKeyStoreRef
}
// Look for CA Certificates
if len(caCertsMatch) != 0 {
caPool, err := createCACertsPool(cs, scope, caCertsMatch, skipInvalid)
if err != nil {
return err
}
config.ClientCAs = caPool
}
} else {
return ErrBadCertStore
}
@@ -278,7 +329,7 @@ func winFindCert(store windows.Handle, enc, findFlags, findType uint32, para *ui
)
if h == 0 {
// Actual error, or simply not found?
if errno, ok := err.(syscall.Errno); ok && errno == winCryptENotFound {
if errno, ok := err.(syscall.Errno); ok && errno == syscall.Errno(winCryptENotFound) {
return nil, ErrFailedCertSearch
}
return nil, ErrFailedCertSearch
@@ -287,6 +338,16 @@ func winFindCert(store windows.Handle, enc, findFlags, findType uint32, para *ui
return (*windows.CertContext)(unsafe.Pointer(h)), nil
}
// winVerifyCertValid wraps the CertVerifyTimeValidity and simply returns true if the certificate is valid
func winVerifyCertValid(timeToVerify *windows.Filetime, certInfo *windows.CertInfo) bool {
// this function does not document returning errors / setting lasterror
r, _, _ := winCertVerifyTimeValidity.Call(
uintptr(unsafe.Pointer(timeToVerify)),
uintptr(unsafe.Pointer(certInfo)),
)
return r == 0
}
// winCertStore is a store implementation for the Windows Certificate Store
type winCertStore struct {
Prov uintptr
@@ -326,21 +387,70 @@ func winCertContextToX509(ctx *windows.CertContext) (*x509.Certificate, error) {
// CertContext pointer returned allows subsequent key operations like Sign. Caller specifies
// current user's personal certs or local machine's personal certs using storeType.
// See CERT_FIND_ISSUER_STR description at https://learn.microsoft.com/en-us/windows/win32/api/wincrypt/nf-wincrypt-certfindcertificateinstore
func (w *winCertStore) certByIssuer(issuer string, storeType uint32) (*x509.Certificate, *windows.CertContext, error) {
return w.certSearch(winFindIssuerStr, issuer, winMyStore, storeType)
func (w *winCertStore) certByIssuer(issuer string, storeType uint32, skipInvalid bool) (*x509.Certificate, *windows.CertContext, error) {
return w.certSearch(winFindIssuerStr, issuer, winMyStore, storeType, skipInvalid)
}
// certBySubject matches and returns the first certificate found by passed subject field.
// CertContext pointer returned allows subsequent key operations like Sign. Caller specifies
// current user's personal certs or local machine's personal certs using storeType.
// See CERT_FIND_SUBJECT_STR description at https://learn.microsoft.com/en-us/windows/win32/api/wincrypt/nf-wincrypt-certfindcertificateinstore
func (w *winCertStore) certBySubject(subject string, storeType uint32) (*x509.Certificate, *windows.CertContext, error) {
return w.certSearch(winFindSubjectStr, subject, winMyStore, storeType)
func (w *winCertStore) certBySubject(subject string, storeType uint32, skipInvalid bool) (*x509.Certificate, *windows.CertContext, error) {
return w.certSearch(winFindSubjectStr, subject, winMyStore, storeType, skipInvalid)
}
// certByThumbprint matches and returns the first certificate found by passed SHA1 thumbprint.
// CertContext pointer returned allows subsequent key operations like Sign. Caller specifies
// current user's personal certs or local machine's personal certs using storeType.
// See CERT_FIND_SUBJECT_STR description at https://learn.microsoft.com/en-us/windows/win32/api/wincrypt/nf-wincrypt-certfindcertificateinstore
func (w *winCertStore) certByThumbprint(hash string, storeType uint32, skipInvalid bool) (*x509.Certificate, *windows.CertContext, error) {
return w.certSearch(winFindHashStr, hash, winMyStore, storeType, skipInvalid)
}
// caCertsBySubjectMatch matches and returns all matching certificates of the subject field.
//
// The following locations are searched:
// 1) Root (Trusted Root Certification Authorities)
// 2) AuthRoot (Third-Party Root Certification Authorities)
// 3) CA (Intermediate Certification Authorities)
//
// Caller specifies current user's personal certs or local machine's personal certs using storeType.
// See CERT_FIND_SUBJECT_STR description at https://learn.microsoft.com/en-us/windows/win32/api/wincrypt/nf-wincrypt-certfindcertificateinstore
func (w *winCertStore) caCertsBySubjectMatch(subject string, storeType uint32, skipInvalid bool) ([]*x509.Certificate, error) {
var (
leaf *x509.Certificate
searchLocations = [3]*uint16{winRootStore, winAuthRootStore, winIntermediateCAStore}
rv []*x509.Certificate
)
// surprisingly, an empty string returns a result. We'll treat this as an error.
if subject == "" {
return nil, ErrBadCaCertMatchField
}
for _, sr := range searchLocations {
var err error
if leaf, _, err = w.certSearch(winFindSubjectStr, subject, sr, storeType, skipInvalid); err == nil {
rv = append(rv, leaf)
} else {
// Ignore the failed search from a single location. Errors we catch include
// ErrFailedX509Extract (resulting from a malformed certificate) and errors
// around invalid attributes, unsupported algorithms, etc. These are corner
// cases as certificates with these errors shouldn't have been allowed
// to be added to the store in the first place.
if err != ErrFailedCertSearch {
return nil, err
}
}
}
// Not found anywhere
if len(rv) == 0 {
return nil, ErrFailedCertSearch
}
return rv, nil
}
// certSearch is a helper function to lookup certificates based on search type and match value.
// store is used to specify which store to perform the lookup in (system or user).
func (w *winCertStore) certSearch(searchType uint32, matchValue string, searchRoot *uint16, store uint32) (*x509.Certificate, *windows.CertContext, error) {
func (w *winCertStore) certSearch(searchType uint32, matchValue string, searchRoot *uint16, store uint32, skipInvalid bool) (*x509.Certificate, *windows.CertContext, error) {
// store handle to "MY" store
h, err := w.storeHandle(store, searchRoot)
if err != nil {
@@ -357,23 +467,32 @@ func (w *winCertStore) certSearch(searchType uint32, matchValue string, searchRo
// pass 0 as the third parameter because it is not used
// https://msdn.microsoft.com/en-us/library/windows/desktop/aa376064(v=vs.85).aspx
nc, err := winFindCert(h, winEncodingX509ASN|winEncodingPKCS7, 0, searchType, i, prev)
if err != nil {
return nil, nil, err
}
if nc != nil {
// certificate found
prev = nc
// Extract the DER-encoded certificate from the cert context
xc, err := winCertContextToX509(nc)
if err == nil {
cert = xc
} else {
return nil, nil, ErrFailedX509Extract
for {
nc, err := winFindCert(h, winEncodingX509ASN|winEncodingPKCS7, 0, searchType, i, prev)
if err != nil {
return nil, nil, err
}
if nc != nil {
// certificate found
prev = nc
var now *windows.Filetime
if skipInvalid && !winVerifyCertValid(now, nc.CertInfo) {
continue
}
// Extract the DER-encoded certificate from the cert context
xc, err := winCertContextToX509(nc)
if err == nil {
cert = xc
break
} else {
return nil, nil, ErrFailedX509Extract
}
} else {
return nil, nil, ErrFailedCertSearch
}
} else {
return nil, nil, ErrFailedCertSearch
}
if cert == nil {
@@ -396,7 +515,7 @@ func winNewStoreHandle(provider uint32, store *uint16) (*winStoreHandle, error)
winCertStoreProvSystem,
0,
0,
provider,
provider|winCertStoreReadOnly,
uintptr(unsafe.Pointer(store)))
if err != nil {
return nil, ErrBadCryptoStoreProvider

View File

@@ -68,6 +68,12 @@ var (
// ErrBadCertMatchField represents malformed cert_match option
ErrBadCertMatchField = errors.New("expected 'cert_match' to be a valid non-empty string")
// ErrBadCaCertMatchField represents malformed cert_match option
ErrBadCaCertMatchField = errors.New("expected 'ca_certs_match' to be a valid non-empty string array")
// ErrBadCertMatchSkipInvalidField represents malformed cert_match_skip_invalid option
ErrBadCertMatchSkipInvalidField = errors.New("expected 'cert_match_skip_invalid' to be a boolean")
// ErrOSNotCompatCertStore represents cert_store passed that exists but is not valid on current OS
ErrOSNotCompatCertStore = errors.New("cert_store not compatible with current operating system")
)

View File

@@ -261,6 +261,9 @@ type client struct {
last time.Time
lastIn time.Time
repliesSincePrune uint16
lastReplyPrune time.Time
headers bool
rtt time.Duration
@@ -420,6 +423,7 @@ const (
pruneSize = 32
routeTargetInit = 8
replyPermLimit = 4096
replyPruneTime = time.Second
)
// Represent read cache booleans with a bitmask
@@ -3526,9 +3530,11 @@ func (c *client) deliverMsg(prodIsMQTT bool, sub *subscription, acc *Account, su
// If we are tracking dynamic publish permissions that track reply subjects,
// do that accounting here. We only look at client.replies which will be non-nil.
if client.replies != nil && len(reply) > 0 {
// Only reply subject permissions if the client is not already allowed to publish to the reply subject.
if client.replies != nil && len(reply) > 0 && !client.pubAllowedFullCheck(string(reply), true, true) {
client.replies[string(reply)] = &resp{time.Now(), 0}
if len(client.replies) > replyPermLimit {
client.repliesSincePrune++
if client.repliesSincePrune > replyPermLimit || time.Since(client.lastReplyPrune) > replyPruneTime {
client.pruneReplyPerms()
}
}
@@ -3652,6 +3658,9 @@ func (c *client) pruneReplyPerms() {
delete(c.replies, k)
}
}
c.repliesSincePrune = 0
c.lastReplyPrune = now
}
// pruneDenyCache will prune the deny cache via randomly
@@ -3720,7 +3729,7 @@ func (c *client) pubAllowedFullCheck(subject string, fullCheck, hasLock bool) bo
allowed = np == 0
}
// If we are currently not allowed but we are tracking reply subjects
// If we are tracking reply subjects
// dynamically, check to see if we are allowed here but avoid pcache.
// We need to acquire the lock though.
if !allowed && fullCheck && c.perms.resp != nil {
@@ -4570,6 +4579,21 @@ func (c *client) processMsgResults(acc *Account, r *SublistResult, msg, deliver,
// Declared here because of goto.
var queues [][]byte
var leafOrigin string
switch c.kind {
case ROUTER:
if len(c.pa.origin) > 0 {
// Picture a message sent from a leafnode to a server that then routes
// this message: CluserA -leaf-> HUB1 -route-> HUB2
// Here we are in HUB2, so c.kind is a ROUTER, but the message will
// contain a c.pa.origin set to "ClusterA" to indicate that this message
// originated from that leafnode cluster.
leafOrigin = bytesToString(c.pa.origin)
}
case LEAF:
leafOrigin = c.remoteCluster()
}
// For all routes/leaf/gateway connections, we may still want to send messages to
// leaf nodes or routes even if there are no queue filters since we collect
// them above and do not process inline like normal clients.
@@ -4608,12 +4632,24 @@ func (c *client) processMsgResults(acc *Account, r *SublistResult, msg, deliver,
ql := _ql[:0]
for i := 0; i < len(qsubs); i++ {
sub = qsubs[i]
if sub.client.kind == LEAF || sub.client.kind == ROUTER {
// If we have assigned an rsub already, replace if the destination is a LEAF
// since we want to favor that compared to a ROUTER. We could make sure that
// we override only if previous was a ROUTE and not a LEAF, but we don't have to.
if rsub == nil || sub.client.kind == LEAF {
if dst := sub.client.kind; dst == LEAF || dst == ROUTER {
// If the destination is a LEAF, we first need to make sure
// that we would not pick one that was the origin of this
// message.
if dst == LEAF && leafOrigin != _EMPTY_ && leafOrigin == sub.client.remoteCluster() {
continue
}
// If we have assigned a ROUTER rsub already, replace if
// the destination is a LEAF since we want to favor that.
if rsub == nil || (rsub.client.kind == ROUTER && dst == LEAF) {
rsub = sub
} else if dst == LEAF {
// We already have a LEAF and this is another one.
// Flip a coin to see if we swap it or not.
// See https://github.com/nats-io/nats-server/issues/6040
if fastrand.Uint32()%2 == 1 {
rsub = sub
}
}
} else {
ql = append(ql, sub)
@@ -4629,6 +4665,8 @@ func (c *client) processMsgResults(acc *Account, r *SublistResult, msg, deliver,
}
// Find a subscription that is able to deliver this message starting at a random index.
// Note that if the message came from a ROUTER, we will only have CLIENT or LEAF
// queue subs here, otherwise we can have all types.
for i := 0; i < lqs; i++ {
if sindex+i < lqs {
sub = qsubs[sindex+i]
@@ -4649,20 +4687,38 @@ func (c *client) processMsgResults(acc *Account, r *SublistResult, msg, deliver,
// Here we just care about a client or leaf and skipping a leaf and preferring locals.
if dst := sub.client.kind; dst == ROUTER || dst == LEAF {
if (src == LEAF || src == CLIENT) && dst == LEAF {
// If we come from a LEAF and are about to pick a LEAF connection,
// make sure this is not the same leaf cluster.
if src == LEAF && leafOrigin != _EMPTY_ && leafOrigin == sub.client.remoteCluster() {
continue
}
// Remember that leaf in case we don't find any other candidate.
// We already start randomly in lqs slice, so we don't need
// to do a random swap if we already have an rsub like we do
// when src == ROUTER above.
if rsub == nil {
rsub = sub
}
continue
} else {
// We would be picking a route, but if we had remembered a "hub" leaf,
// then pick that one instead of the route.
if rsub != nil && rsub.client.kind == LEAF && rsub.client.isHubLeafNode() {
break
// We want to favor qsubs in our own cluster. If the routed
// qsub has an origin, it means that is on behalf of a leaf.
// We need to treat it differently.
if len(sub.origin) > 0 {
// If we already have an rsub, nothing to do. Also, do
// not pick a routed qsub for a LEAF origin cluster
// that is the same than where the message comes from.
if rsub == nil && (leafOrigin == _EMPTY_ || leafOrigin != bytesToString(sub.origin)) {
rsub = sub
}
continue
}
// This is a qsub that is local on the remote server (or
// we are connected to an older server and we don't know).
// Pick this one and be done.
rsub = sub
break
}
break
}
// Assume delivery subject is normal subject to this point.
@@ -4749,18 +4805,11 @@ sendToRoutesOrLeafs:
// If so make sure we do not send it back to the same cluster for a different
// leafnode. Cluster wide no echo.
if dc.kind == LEAF {
// Check two scenarios. One is inbound from a route (c.pa.origin)
if c.kind == ROUTER && len(c.pa.origin) > 0 {
if bytesToString(c.pa.origin) == dc.remoteCluster() {
continue
}
}
// The other is leaf to leaf.
if c.kind == LEAF {
src, dest := c.remoteCluster(), dc.remoteCluster()
if src != _EMPTY_ && src == dest {
continue
}
// Check two scenarios. One is inbound from a route (c.pa.origin),
// and the other is leaf to leaf. In both case, leafOrigin is the one
// to use for the comparison.
if leafOrigin != _EMPTY_ && leafOrigin == dc.remoteCluster() {
continue
}
// We need to check if this is a request that has a stamped client information header.

View File

@@ -55,7 +55,7 @@ func init() {
const (
// VERSION is the current version for the server.
VERSION = "2.10.22"
VERSION = "2.10.24"
// PROTO is the currently supported protocol.
// 0 was the original
@@ -171,6 +171,9 @@ const (
// MAX_HPUB_ARGS Maximum possible number of arguments from HPUB proto.
MAX_HPUB_ARGS = 4
// MAX_RSUB_ARGS Maximum possible number of arguments from a RS+/LS+ proto.
MAX_RSUB_ARGS = 6
// DEFAULT_MAX_CLOSED_CLIENTS is the maximum number of closed connections we hold onto.
DEFAULT_MAX_CLOSED_CLIENTS = 10000

View File

@@ -345,6 +345,7 @@ type consumer struct {
outq *jsOutQ
pending map[uint64]*Pending
ptmr *time.Timer
ptmrEnd time.Time
rdq []uint64
rdqi avl.SequenceSet
rdc map[uint64]uint64
@@ -504,7 +505,7 @@ func checkConsumerCfg(
}
// Check if we have a BackOff defined that MaxDeliver is within range etc.
if lbo := len(config.BackOff); lbo > 0 && config.MaxDeliver != -1 && config.MaxDeliver <= lbo {
if lbo := len(config.BackOff); lbo > 0 && config.MaxDeliver != -1 && lbo > config.MaxDeliver {
return NewJSConsumerMaxDeliverBackoffError()
}
@@ -950,7 +951,7 @@ func (mset *stream) addConsumerWithAssignment(config *ConsumerConfig, oname stri
// If we have multiple filter subjects, create a sublist which we will use
// in calling store.LoadNextMsgMulti.
if len(o.cfg.FilterSubjects) > 0 {
o.filters = NewSublistWithCache()
o.filters = NewSublistNoCache()
for _, filter := range o.cfg.FilterSubjects {
o.filters.Insert(&subscription{subject: []byte(filter)})
}
@@ -1349,7 +1350,7 @@ func (o *consumer) setLeader(isLeader bool) {
stopAndClearTimer(&o.dtmr)
// Make sure to clear out any re-deliver queues
stopAndClearTimer(&o.ptmr)
o.stopAndClearPtmr()
o.rdq = nil
o.rdqi.Empty()
o.pending = nil
@@ -1562,6 +1563,16 @@ func (o *consumer) updateDeliveryInterest(localInterest bool) bool {
return false
}
const (
defaultConsumerNotActiveStartInterval = 30 * time.Second
defaultConsumerNotActiveMaxInterval = 5 * time.Minute
)
var (
consumerNotActiveStartInterval = defaultConsumerNotActiveStartInterval
consumerNotActiveMaxInterval = defaultConsumerNotActiveMaxInterval
)
func (o *consumer) deleteNotActive() {
o.mu.Lock()
if o.mset == nil {
@@ -1627,12 +1638,8 @@ func (o *consumer) deleteNotActive() {
// Check to make sure we went away.
// Don't think this needs to be a monitored go routine.
go func() {
const (
startInterval = 30 * time.Second
maxInterval = 5 * time.Minute
)
jitter := time.Duration(rand.Int63n(int64(startInterval)))
interval := startInterval + jitter
jitter := time.Duration(rand.Int63n(int64(consumerNotActiveStartInterval)))
interval := consumerNotActiveStartInterval + jitter
ticker := time.NewTicker(interval)
defer ticker.Stop()
for range ticker.C {
@@ -1647,7 +1654,7 @@ func (o *consumer) deleteNotActive() {
if nca != nil && nca == ca {
s.Warnf("Consumer assignment for '%s > %s > %s' not cleaned up, retrying", acc, stream, name)
meta.ForwardProposal(removeEntry)
if interval < maxInterval {
if interval < consumerNotActiveMaxInterval {
interval *= 2
ticker.Reset(interval)
}
@@ -1739,7 +1746,7 @@ func (o *consumer) forceExpirePending() {
p.Timestamp += off
}
}
o.ptmr.Reset(o.ackWait(0))
o.resetPtmr(o.ackWait(0))
}
o.signalNewMessages()
}
@@ -1842,7 +1849,7 @@ func (acc *Account) checkNewConsumerConfig(cfg, ncfg *ConsumerConfig) error {
}
// Check if BackOff is defined, MaxDeliver is within range.
if lbo := len(ncfg.BackOff); lbo > 0 && ncfg.MaxDeliver != -1 && ncfg.MaxDeliver <= lbo {
if lbo := len(ncfg.BackOff); lbo > 0 && ncfg.MaxDeliver != -1 && lbo > ncfg.MaxDeliver {
return NewJSConsumerMaxDeliverBackoffError()
}
@@ -1882,7 +1889,7 @@ func (o *consumer) updateConfig(cfg *ConsumerConfig) error {
// AckWait
if cfg.AckWait != o.cfg.AckWait {
if o.ptmr != nil {
o.ptmr.Reset(100 * time.Millisecond)
o.resetPtmr(100 * time.Millisecond)
}
}
// Rate Limit
@@ -1940,7 +1947,7 @@ func (o *consumer) updateConfig(cfg *ConsumerConfig) error {
if len(o.subjf) == 1 {
o.filters = nil
} else {
o.filters = NewSublistWithCache()
o.filters = NewSublistNoCache()
for _, filter := range o.subjf {
o.filters.Insert(&subscription{subject: []byte(filter.subject)})
}
@@ -2205,9 +2212,7 @@ func (o *consumer) updateDelivered(dseq, sseq, dc uint64, ts int64) {
n += binary.PutUvarint(b[n:], dc)
n += binary.PutVarint(b[n:], ts)
o.propose(b[:n])
}
if o.store != nil {
// Update local state always.
} else if o.store != nil {
o.store.UpdateDelivered(dseq, sseq, dc, ts)
}
// Update activity.
@@ -2413,7 +2418,7 @@ func (o *consumer) processNak(sseq, dseq, dc uint64, nak []byte) {
if o.ptmr != nil {
// Want checkPending to run and figure out the next timer ttl.
// TODO(dlc) - We could optimize this maybe a bit more and track when we expect the timer to fire.
o.ptmr.Reset(10 * time.Millisecond)
o.resetPtmr(10 * time.Millisecond)
}
}
// Nothing else for use to do now so return.
@@ -2547,11 +2552,7 @@ func (o *consumer) applyState(state *ConsumerState) {
if o.cfg.AckWait < delay {
delay = o.ackWait(0)
}
if o.ptmr == nil {
o.ptmr = time.AfterFunc(delay, o.checkPending)
} else {
o.ptmr.Reset(delay)
}
o.resetPtmr(delay)
}
}
@@ -2666,23 +2667,20 @@ func (o *consumer) infoWithSnapAndReply(snap bool, reply string) *ConsumerInfo {
TimeStamp: time.Now().UTC(),
}
// If we are replicated and we are not the leader or we are filtered, we need to pull certain data from our store.
isLeader := o.isLeader()
if rg != nil && rg.node != nil && o.store != nil && (!isLeader || o.isFiltered()) {
// If we are replicated, we need to pull certain data from our store.
if rg != nil && rg.node != nil && o.store != nil {
state, err := o.store.BorrowState()
if err != nil {
o.mu.Unlock()
return nil
}
if !isLeader {
info.Delivered.Consumer, info.Delivered.Stream = state.Delivered.Consumer, state.Delivered.Stream
info.AckFloor.Consumer, info.AckFloor.Stream = state.AckFloor.Consumer, state.AckFloor.Stream
// If we are the leader we could have o.sseq that is skipped ahead.
// To maintain consistency in reporting (e.g. jsz) we always take the state for our delivered/ackfloor stream sequence.
info.Delivered.Consumer, info.Delivered.Stream = state.Delivered.Consumer, state.Delivered.Stream
info.AckFloor.Consumer, info.AckFloor.Stream = state.AckFloor.Consumer, state.AckFloor.Stream
if !o.isLeader() {
info.NumAckPending = len(state.Pending)
info.NumRedelivered = len(state.Redelivered)
} else {
// Since we are filtered and we are the leader we could have o.sseq that is skipped ahead.
// To maintain consistency in reporting (e.g. jsz) we take the state for our delivered stream sequence.
info.Delivered.Stream = state.Delivered.Stream
}
}
@@ -2786,18 +2784,30 @@ func (o *consumer) processAckMsg(sseq, dseq, dc uint64, reply string, doSample b
return false
}
// Check if this ack is above the current pointer to our next to deliver.
// This could happen on a cooperative takeover with high speed deliveries.
if sseq >= o.sseq {
o.sseq = sseq + 1
}
mset := o.mset
if mset == nil || mset.closed.Load() {
o.mu.Unlock()
return false
}
// Check if this ack is above the current pointer to our next to deliver.
// This could happen on a cooperative takeover with high speed deliveries.
if sseq >= o.sseq {
// Let's make sure this is valid.
// This is only received on the consumer leader, so should never be higher
// than the last stream sequence.
var ss StreamState
mset.store.FastState(&ss)
if sseq > ss.LastSeq {
o.srv.Warnf("JetStream consumer '%s > %s > %s' ACK sequence %d past last stream sequence of %d",
o.acc.Name, o.stream, o.name, sseq, ss.LastSeq)
// FIXME(dlc) - For 2.11 onwards should we return an error here to the caller?
o.mu.Unlock()
return false
}
o.sseq = sseq + 1
}
// Let the owning stream know if we are interest or workqueue retention based.
// If this consumer is clustered (o.node != nil) this will be handled by
// processReplicatedAck after the ack has propagated.
@@ -3011,6 +3021,14 @@ func (o *consumer) needAck(sseq uint64, subj string) bool {
return needAck
}
// Used in nextReqFromMsg, since the json.Unmarshal causes the request
// struct to escape to the heap always. This should reduce GC pressure.
var jsGetNextPool = sync.Pool{
New: func() any {
return &JSApiConsumerGetNextRequest{}
},
}
// Helper for the next message requests.
func nextReqFromMsg(msg []byte) (time.Time, int, int, bool, time.Duration, time.Time, error) {
req := bytes.TrimSpace(msg)
@@ -3020,7 +3038,11 @@ func nextReqFromMsg(msg []byte) (time.Time, int, int, bool, time.Duration, time.
return time.Time{}, 1, 0, false, 0, time.Time{}, nil
case req[0] == '{':
var cr JSApiConsumerGetNextRequest
cr := jsGetNextPool.Get().(*JSApiConsumerGetNextRequest)
defer func() {
*cr = JSApiConsumerGetNextRequest{}
jsGetNextPool.Put(cr)
}()
if err := json.Unmarshal(req, &cr); err != nil {
return time.Time{}, -1, 0, false, 0, time.Time{}, err
}
@@ -3420,6 +3442,7 @@ func (o *consumer) processNextMsgRequest(reply string, msg []byte) {
if err := o.waiting.add(wr); err != nil {
sendErr(409, "Exceeded MaxWaiting")
wr.recycle()
return
}
o.signalNewMessages()
@@ -3625,7 +3648,7 @@ func (o *consumer) getNextMsg() (*jsPubMsg, uint64, error) {
// Check if we are multi-filtered or not.
if filters != nil {
sm, sseq, err = store.LoadNextMsgMulti(filters, fseq, &pmsg.StoreMsg)
} else if subjf != nil { // Means single filtered subject since o.filters means > 1.
} else if len(subjf) > 0 { // Means single filtered subject since o.filters means > 1.
filter, wc := subjf[0].subject, subjf[0].hasWildcard
sm, sseq, err = store.LoadNextMsg(filter, wc, fseq, &pmsg.StoreMsg)
} else {
@@ -3817,7 +3840,7 @@ func (o *consumer) checkAckFloor() {
// We will set it explicitly to 1 behind our current lowest in pending, or if
// pending is empty, to our current delivered -1.
const minOffThreshold = 50
if o.asflr < ss.FirstSeq-minOffThreshold {
if ss.FirstSeq >= minOffThreshold && o.asflr < ss.FirstSeq-minOffThreshold {
var psseq, pdseq uint64
for seq, p := range o.pending {
if psseq == 0 || seq < psseq {
@@ -4270,37 +4293,15 @@ func (o *consumer) calculateNumPending() (npc, npf uint64) {
}
isLastPerSubject := o.cfg.DeliverPolicy == DeliverLastPerSubject
filters, subjf := o.filters, o.subjf
// Deliver Last Per Subject calculates num pending differently.
if isLastPerSubject {
// Consumer without filters.
if o.subjf == nil {
return o.mset.store.NumPending(o.sseq, _EMPTY_, isLastPerSubject)
}
// Consumer with filters.
for _, filter := range o.subjf {
lnpc, lnpf := o.mset.store.NumPending(o.sseq, filter.subject, isLastPerSubject)
npc += lnpc
if lnpf > npf {
npf = lnpf // Always last
}
}
return npc, npf
if filters != nil {
return o.mset.store.NumPendingMulti(o.sseq, filters, isLastPerSubject)
} else if len(subjf) > 0 {
filter := subjf[0].subject
return o.mset.store.NumPending(o.sseq, filter, isLastPerSubject)
}
// Every other Delivery Policy is handled here.
// Consumer without filters.
if o.subjf == nil {
return o.mset.store.NumPending(o.sseq, _EMPTY_, false)
}
// Consumer with filters.
for _, filter := range o.subjf {
lnpc, lnpf := o.mset.store.NumPending(o.sseq, filter.subject, false)
npc += lnpc
if lnpf > npf {
npf = lnpf // Always last
}
}
return npc, npf
return o.mset.store.NumPending(o.sseq, _EMPTY_, isLastPerSubject)
}
func convertToHeadersOnly(pmsg *jsPubMsg) {
@@ -4465,9 +4466,24 @@ func (o *consumer) trackPending(sseq, dseq uint64) {
if o.pending == nil {
o.pending = make(map[uint64]*Pending)
}
if o.ptmr == nil {
o.ptmr = time.AfterFunc(o.ackWait(0), o.checkPending)
// We could have a backoff that set a timer higher than what we need for this message.
// In that case, reset to lowest backoff required for a message redelivery.
minDelay := o.ackWait(0)
if l := len(o.cfg.BackOff); l > 0 {
bi := int(o.rdc[sseq])
if bi < 0 {
bi = 0
} else if bi >= l {
bi = l - 1
}
minDelay = o.ackWait(o.cfg.BackOff[bi])
}
minDeadline := time.Now().Add(minDelay)
if o.ptmr == nil || o.ptmrEnd.After(minDeadline) {
o.resetPtmr(minDelay)
}
if p, ok := o.pending[sseq]; ok {
// Update timestamp but keep original consumer delivery sequence.
// So do not update p.Sequence.
@@ -4590,24 +4606,21 @@ func (o *consumer) removeFromRedeliverQueue(seq uint64) bool {
// Checks the pending messages.
func (o *consumer) checkPending() {
o.mu.RLock()
o.mu.Lock()
defer o.mu.Unlock()
mset := o.mset
// On stop, mset and timer will be nil.
if o.closed || mset == nil || o.ptmr == nil {
stopAndClearTimer(&o.ptmr)
o.mu.RUnlock()
o.stopAndClearPtmr()
return
}
o.mu.RUnlock()
var shouldUpdateState bool
var state StreamState
mset.store.FastState(&state)
fseq := state.FirstSeq
o.mu.Lock()
defer o.mu.Unlock()
now := time.Now().UnixNano()
ttl := int64(o.cfg.AckWait)
next := int64(o.ackWait(0))
@@ -4623,11 +4636,7 @@ func (o *consumer) checkPending() {
check := len(o.pending) > 1024
for seq, p := range o.pending {
if check && atomic.LoadInt64(&o.awl) > 0 {
if o.ptmr == nil {
o.ptmr = time.AfterFunc(100*time.Millisecond, o.checkPending)
} else {
o.ptmr.Reset(100 * time.Millisecond)
}
o.resetPtmr(100 * time.Millisecond)
return
}
// Check if these are no longer valid.
@@ -4694,15 +4703,10 @@ func (o *consumer) checkPending() {
}
if len(o.pending) > 0 {
delay := time.Duration(next)
if o.ptmr == nil {
o.ptmr = time.AfterFunc(delay, o.checkPending)
} else {
o.ptmr.Reset(o.ackWait(delay))
}
o.resetPtmr(time.Duration(next))
} else {
// Make sure to stop timer and clear out any re delivery queues
stopAndClearTimer(&o.ptmr)
o.stopAndClearPtmr()
o.rdq = nil
o.rdqi.Empty()
o.pending = nil
@@ -4890,7 +4894,7 @@ func (o *consumer) selectStartingSeqNo() {
for _, filter := range o.subjf {
// Use first sequence since this is more optimized atm.
ss := o.mset.store.FilteredState(state.FirstSeq, filter.subject)
if ss.First > o.sseq && ss.First < nseq {
if ss.First >= o.sseq && ss.First < nseq {
nseq = ss.First
}
}
@@ -5188,7 +5192,7 @@ func (o *consumer) stopWithFlags(dflag, sdflag, doSignal, advisory bool) error {
o.client = nil
sysc := o.sysc
o.sysc = nil
stopAndClearTimer(&o.ptmr)
o.stopAndClearPtmr()
stopAndClearTimer(&o.dtmr)
stopAndClearTimer(&o.gwdtmr)
delivery := o.cfg.DeliverSubject
@@ -5242,12 +5246,6 @@ func (o *consumer) stopWithFlags(dflag, sdflag, doSignal, advisory bool) error {
if dflag {
n.Delete()
} else {
// Try to install snapshot on clean exit
if o.store != nil && (o.retention != LimitsPolicy || n.NeedSnapshot()) {
if snap, err := o.store.EncodedState(); err == nil {
n.InstallSnapshot(snap)
}
}
n.Stop()
}
}
@@ -5329,12 +5327,14 @@ func (o *consumer) cleanupNoInterestMessages(mset *stream, ignoreInterest bool)
return
}
mset.mu.RUnlock()
mset.mu.Lock()
for seq := start; seq <= stop; seq++ {
if mset.noInterest(seq, co) {
rmseqs = append(rmseqs, seq)
}
}
mset.mu.RUnlock()
mset.mu.Unlock()
// These can be removed.
for _, seq := range rmseqs {
@@ -5590,8 +5590,9 @@ func (o *consumer) checkStateForInterestStream(ss *StreamState) error {
o.mu.Lock()
// Update our check floor.
if seq > o.chkflr {
o.chkflr = seq
// Check floor must never be greater than ack floor+1, otherwise subsequent calls to this function would skip work.
if asflr+1 > o.chkflr {
o.chkflr = asflr + 1
}
// See if we need to process this update if our parent stream is not a limits policy stream.
state, _ = o.store.State()
@@ -5610,3 +5611,17 @@ func (o *consumer) checkStateForInterestStream(ss *StreamState) error {
}
return nil
}
func (o *consumer) resetPtmr(delay time.Duration) {
if o.ptmr == nil {
o.ptmr = time.AfterFunc(delay, o.checkPending)
} else {
o.ptmr.Reset(delay)
}
o.ptmrEnd = time.Now().Add(delay)
}
func (o *consumer) stopAndClearPtmr() {
stopAndClearTimer(&o.ptmr)
o.ptmrEnd = time.Time{}
}

View File

@@ -315,6 +315,15 @@ type ClientInfo struct {
Nonce string `json:"nonce,omitempty"`
}
// forAssignmentSnap returns the minimum amount of ClientInfo we need for assignment snapshots.
func (ci *ClientInfo) forAssignmentSnap() *ClientInfo {
return &ClientInfo{
Account: ci.Account,
Service: ci.Service,
Cluster: ci.Cluster,
}
}
// ServerStats hold various statistics that we will periodically send out.
type ServerStats struct {
Start time.Time `json:"start"`
@@ -1938,7 +1947,9 @@ type ServerAPIResponse struct {
compress compressionType
}
// Specialized response types for unmarshalling.
// Specialized response types for unmarshalling. These structures are not
// used in the server code and only there for users of the Z endpoints to
// unmarshal the data without having to create these structs in their code
// ServerAPIConnzResponse is the response type connz
type ServerAPIConnzResponse struct {
@@ -1947,6 +1958,69 @@ type ServerAPIConnzResponse struct {
Error *ApiError `json:"error,omitempty"`
}
// ServerAPIRoutezResponse is the response type for routez
type ServerAPIRoutezResponse struct {
Server *ServerInfo `json:"server"`
Data *Routez `json:"data,omitempty"`
Error *ApiError `json:"error,omitempty"`
}
// ServerAPIGatewayzResponse is the response type for gatewayz
type ServerAPIGatewayzResponse struct {
Server *ServerInfo `json:"server"`
Data *Gatewayz `json:"data,omitempty"`
Error *ApiError `json:"error,omitempty"`
}
// ServerAPIJszResponse is the response type for jsz
type ServerAPIJszResponse struct {
Server *ServerInfo `json:"server"`
Data *JSInfo `json:"data,omitempty"`
Error *ApiError `json:"error,omitempty"`
}
// ServerAPIHealthzResponse is the response type for healthz
type ServerAPIHealthzResponse struct {
Server *ServerInfo `json:"server"`
Data *HealthStatus `json:"data,omitempty"`
Error *ApiError `json:"error,omitempty"`
}
// ServerAPIVarzResponse is the response type for varz
type ServerAPIVarzResponse struct {
Server *ServerInfo `json:"server"`
Data *Varz `json:"data,omitempty"`
Error *ApiError `json:"error,omitempty"`
}
// ServerAPISubszResponse is the response type for subsz
type ServerAPISubszResponse struct {
Server *ServerInfo `json:"server"`
Data *Subsz `json:"data,omitempty"`
Error *ApiError `json:"error,omitempty"`
}
// ServerAPILeafzResponse is the response type for leafz
type ServerAPILeafzResponse struct {
Server *ServerInfo `json:"server"`
Data *Leafz `json:"data,omitempty"`
Error *ApiError `json:"error,omitempty"`
}
// ServerAPIAccountzResponse is the response type for accountz
type ServerAPIAccountzResponse struct {
Server *ServerInfo `json:"server"`
Data *Accountz `json:"data,omitempty"`
Error *ApiError `json:"error,omitempty"`
}
// ServerAPIExpvarzResponse is the response type for expvarz
type ServerAPIExpvarzResponse struct {
Server *ServerInfo `json:"server"`
Data *ExpvarzStatus `json:"data,omitempty"`
Error *ApiError `json:"error,omitempty"`
}
// statszReq is a request for us to respond with current statsz.
func (s *Server) statszReq(sub *subscription, c *client, _ *Account, subject, reply string, hdr, msg []byte) {
if !s.EventsEnabled() {

View File

@@ -29,6 +29,7 @@ import (
"io"
"io/fs"
"math"
mrand "math/rand"
"net"
"os"
"path/filepath"
@@ -579,6 +580,9 @@ func (fs *fileStore) UpdateConfig(cfg *StreamConfig) error {
if cfg.Storage != FileStorage {
return fmt.Errorf("fileStore requires file storage type in config")
}
if cfg.MaxMsgsPer < -1 {
cfg.MaxMsgsPer = -1
}
fs.mu.Lock()
new_cfg := FileStreamInfo{Created: fs.cfg.Created, StreamConfig: *cfg}
@@ -609,7 +613,7 @@ func (fs *fileStore) UpdateConfig(cfg *StreamConfig) error {
fs.ageChk = nil
}
if fs.cfg.MaxMsgsPer > 0 && fs.cfg.MaxMsgsPer < old_cfg.MaxMsgsPer {
if fs.cfg.MaxMsgsPer > 0 && (old_cfg.MaxMsgsPer == 0 || fs.cfg.MaxMsgsPer < old_cfg.MaxMsgsPer) {
fs.enforceMsgPerSubjectLimit(true)
}
fs.mu.Unlock()
@@ -1739,6 +1743,7 @@ func (fs *fileStore) recoverFullState() (rerr error) {
var matched bool
mb := fs.lmb
if mb == nil || mb.index != blkIndex {
os.Remove(fn)
fs.warn("Stream state block does not exist or index mismatch")
return errCorruptState
}
@@ -1777,6 +1782,14 @@ func (fs *fileStore) recoverFullState() (rerr error) {
}
}
// We check first and last seq and number of msgs and bytes. If there is a difference,
// return and error so we rebuild from the message block state on disk.
if !trackingStatesEqual(&fs.state, &mstate) {
os.Remove(fn)
fs.warn("Stream state encountered internal inconsistency on recover")
return errCorruptState
}
return nil
}
@@ -2809,7 +2822,9 @@ func (fs *fileStore) NumPending(sseq uint64, filter string, lastPerSubject bool)
_tsa, _fsa := [32]string{}, [32]string{}
tsa, fsa := _tsa[:0], _fsa[:0]
fsa = tokenizeSubjectIntoSlice(fsa[:0], filter)
if wc {
fsa = tokenizeSubjectIntoSlice(fsa[:0], filter)
}
isMatch := func(subj string) bool {
if isAll {
@@ -2903,7 +2918,6 @@ func (fs *fileStore) NumPending(sseq uint64, filter string, lastPerSubject bool)
mb := fs.blks[i]
// Hold write lock in case we need to load cache.
mb.mu.Lock()
var t uint64
if isAll && sseq <= atomic.LoadUint64(&mb.first.seq) {
total += mb.msgs
mb.mu.Unlock()
@@ -2918,6 +2932,7 @@ func (fs *fileStore) NumPending(sseq uint64, filter string, lastPerSubject bool)
// Mark fss activity.
mb.lsts = time.Now().UnixNano()
var t uint64
var havePartial bool
mb.fss.Match(stringToBytes(filter), func(bsubj []byte, ss *SimpleState) {
if havePartial {
@@ -2945,8 +2960,12 @@ func (fs *fileStore) NumPending(sseq uint64, filter string, lastPerSubject bool)
}
// Clear on partial.
t = 0
start := sseq
if fseq := atomic.LoadUint64(&mb.first.seq); fseq > start {
start = fseq
}
var smv StoreMsg
for seq, lseq := sseq, atomic.LoadUint64(&mb.last.seq); seq <= lseq; seq++ {
for seq, lseq := start, atomic.LoadUint64(&mb.last.seq); seq <= lseq; seq++ {
if sm, _ := mb.cacheLookup(seq, &smv); sm != nil && isMatch(sm.subj) {
t++
}
@@ -3051,6 +3070,296 @@ func (fs *fileStore) NumPending(sseq uint64, filter string, lastPerSubject bool)
return total, validThrough
}
// NumPending will return the number of pending messages matching any subject in the sublist starting at sequence.
// Optimized for stream num pending calculations for consumers with lots of filtered subjects.
// Subjects should not overlap, this property is held when doing multi-filtered consumers.
func (fs *fileStore) NumPendingMulti(sseq uint64, sl *Sublist, lastPerSubject bool) (total, validThrough uint64) {
fs.mu.RLock()
defer fs.mu.RUnlock()
// This can always be last for these purposes.
validThrough = fs.state.LastSeq
if fs.state.Msgs == 0 || sseq > fs.state.LastSeq {
return 0, validThrough
}
// If sseq is less then our first set to first.
if sseq < fs.state.FirstSeq {
sseq = fs.state.FirstSeq
}
// Track starting for both block for the sseq and staring block that matches any subject.
var seqStart int
// See if we need to figure out starting block per sseq.
if sseq > fs.state.FirstSeq {
// This should not, but can return -1, so make sure we check to avoid panic below.
if seqStart, _ = fs.selectMsgBlockWithIndex(sseq); seqStart < 0 {
seqStart = 0
}
}
isAll := sl == nil
// See if filter was provided but its the only subject.
if !isAll && fs.psim.Size() == 1 {
fs.psim.Iter(func(subject []byte, _ *psi) bool {
isAll = sl.HasInterest(bytesToString(subject))
return true
})
}
// If we are isAll and have no deleted we can do a simpler calculation.
if !lastPerSubject && isAll && (fs.state.LastSeq-fs.state.FirstSeq+1) == fs.state.Msgs {
if sseq == 0 {
return fs.state.Msgs, validThrough
}
return fs.state.LastSeq - sseq + 1, validThrough
}
// Setup the isMatch function.
isMatch := func(subj string) bool {
if isAll {
return true
}
return sl.HasInterest(subj)
}
// Handle last by subject a bit differently.
// We will scan PSIM since we accurately track the last block we have seen the subject in. This
// allows us to only need to load at most one block now.
// For the last block, we need to track the subjects that we know are in that block, and track seen
// while in the block itself, but complexity there worth it.
if lastPerSubject {
// If we want all and our start sequence is equal or less than first return number of subjects.
if isAll && sseq <= fs.state.FirstSeq {
return uint64(fs.psim.Size()), validThrough
}
// If we are here we need to scan. We are going to scan the PSIM looking for lblks that are >= seqStart.
// This will build up a list of all subjects from the selected block onward.
lbm := make(map[string]bool)
mb := fs.blks[seqStart]
bi := mb.index
subs := make([]*subscription, 0, sl.Count())
sl.All(&subs)
for _, sub := range subs {
fs.psim.Match(sub.subject, func(subj []byte, psi *psi) {
// If the select blk start is greater than entry's last blk skip.
if bi > psi.lblk {
return
}
total++
// We will track the subjects that are an exact match to the last block.
// This is needed for last block processing.
if psi.lblk == bi {
lbm[string(subj)] = true
}
})
}
// Now check if we need to inspect the seqStart block.
// Grab write lock in case we need to load in msgs.
mb.mu.Lock()
var shouldExpire bool
// We need to walk this block to correct accounting from above.
if sseq > mb.first.seq {
// Track the ones we add back in case more than one.
seen := make(map[string]bool)
// We need to discount the total by subjects seen before sseq, but also add them right back in if they are >= sseq for this blk.
// This only should be subjects we know have the last blk in this block.
if mb.cacheNotLoaded() {
mb.loadMsgsWithLock()
shouldExpire = true
}
var smv StoreMsg
for seq, lseq := atomic.LoadUint64(&mb.first.seq), atomic.LoadUint64(&mb.last.seq); seq <= lseq; seq++ {
sm, _ := mb.cacheLookup(seq, &smv)
if sm == nil || sm.subj == _EMPTY_ || !lbm[sm.subj] {
continue
}
if isMatch(sm.subj) {
// If less than sseq adjust off of total as long as this subject matched the last block.
if seq < sseq {
if !seen[sm.subj] {
total--
seen[sm.subj] = true
}
} else if seen[sm.subj] {
// This is equal or more than sseq, so add back in.
total++
// Make sure to not process anymore.
delete(seen, sm.subj)
}
}
}
}
// If we loaded the block try to force expire.
if shouldExpire {
mb.tryForceExpireCacheLocked()
}
mb.mu.Unlock()
return total, validThrough
}
// If we would need to scan more from the beginning, revert back to calculating directly here.
if seqStart >= (len(fs.blks) / 2) {
for i := seqStart; i < len(fs.blks); i++ {
var shouldExpire bool
mb := fs.blks[i]
// Hold write lock in case we need to load cache.
mb.mu.Lock()
if isAll && sseq <= atomic.LoadUint64(&mb.first.seq) {
total += mb.msgs
mb.mu.Unlock()
continue
}
// If we are here we need to at least scan the subject fss.
// Make sure we have fss loaded.
if mb.fssNotLoaded() {
mb.loadMsgsWithLock()
shouldExpire = true
}
// Mark fss activity.
mb.lsts = time.Now().UnixNano()
var t uint64
var havePartial bool
IntersectStree[SimpleState](mb.fss, sl, func(bsubj []byte, ss *SimpleState) {
subj := bytesToString(bsubj)
if havePartial {
// If we already found a partial then don't do anything else.
return
}
if ss.firstNeedsUpdate {
mb.recalculateFirstForSubj(subj, ss.First, ss)
}
if sseq <= ss.First {
t += ss.Msgs
} else if sseq <= ss.Last {
// We matched but its a partial.
havePartial = true
}
})
// See if we need to scan msgs here.
if havePartial {
// Make sure we have the cache loaded.
if mb.cacheNotLoaded() {
mb.loadMsgsWithLock()
shouldExpire = true
}
// Clear on partial.
t = 0
start := sseq
if fseq := atomic.LoadUint64(&mb.first.seq); fseq > start {
start = fseq
}
var smv StoreMsg
for seq, lseq := start, atomic.LoadUint64(&mb.last.seq); seq <= lseq; seq++ {
if sm, _ := mb.cacheLookup(seq, &smv); sm != nil && isMatch(sm.subj) {
t++
}
}
}
// If we loaded this block for this operation go ahead and expire it here.
if shouldExpire {
mb.tryForceExpireCacheLocked()
}
mb.mu.Unlock()
total += t
}
return total, validThrough
}
// If we are here it's better to calculate totals from psim and adjust downward by scanning less blocks.
start := uint32(math.MaxUint32)
subs := make([]*subscription, 0, sl.Count())
sl.All(&subs)
for _, sub := range subs {
fs.psim.Match(sub.subject, func(_ []byte, psi *psi) {
total += psi.total
// Keep track of start index for this subject.
if psi.fblk < start {
start = psi.fblk
}
})
}
// See if we were asked for all, if so we are done.
if sseq <= fs.state.FirstSeq {
return total, validThrough
}
// If we are here we need to calculate partials for the first blocks.
firstSubjBlk := fs.bim[start]
var firstSubjBlkFound bool
// Adjust in case not found.
if firstSubjBlk == nil {
firstSubjBlkFound = true
}
// Track how many we need to adjust against the total.
var adjust uint64
for i := 0; i <= seqStart; i++ {
mb := fs.blks[i]
// We can skip blks if we know they are below the first one that has any subject matches.
if !firstSubjBlkFound {
if firstSubjBlkFound = (mb == firstSubjBlk); !firstSubjBlkFound {
continue
}
}
// We need to scan this block.
var shouldExpire bool
mb.mu.Lock()
// Check if we should include all of this block in adjusting. If so work with metadata.
if sseq > atomic.LoadUint64(&mb.last.seq) {
if isAll {
adjust += mb.msgs
} else {
// We need to adjust for all matches in this block.
// Make sure we have fss loaded. This loads whole block now.
if mb.fssNotLoaded() {
mb.loadMsgsWithLock()
shouldExpire = true
}
// Mark fss activity.
mb.lsts = time.Now().UnixNano()
IntersectStree(mb.fss, sl, func(bsubj []byte, ss *SimpleState) {
adjust += ss.Msgs
})
}
} else {
// This is the last block. We need to scan per message here.
if mb.cacheNotLoaded() {
mb.loadMsgsWithLock()
shouldExpire = true
}
var last = atomic.LoadUint64(&mb.last.seq)
if sseq < last {
last = sseq
}
// We need to walk all messages in this block
var smv StoreMsg
for seq := atomic.LoadUint64(&mb.first.seq); seq < last; seq++ {
sm, _ := mb.cacheLookup(seq, &smv)
if sm == nil || sm.subj == _EMPTY_ {
continue
}
// Check if it matches our filter.
if sm.seq < sseq && isMatch(sm.subj) {
adjust++
}
}
}
// If we loaded the block try to force expire.
if shouldExpire {
mb.tryForceExpireCacheLocked()
}
mb.mu.Unlock()
}
// Make final adjustment.
total -= adjust
return total, validThrough
}
// SubjectsTotal return message totals per subject.
func (fs *fileStore) SubjectsTotals(filter string) map[string]uint64 {
fs.mu.RLock()
@@ -7259,16 +7568,22 @@ func (fs *fileStore) reset() error {
}
// Return all active tombstones in this msgBlock.
// Write lock should be held.
func (mb *msgBlock) tombs() []msgId {
var tombs []msgId
mb.mu.Lock()
defer mb.mu.Unlock()
return mb.tombsLocked()
}
if !mb.cacheAlreadyLoaded() {
// Return all active tombstones in this msgBlock.
// Write lock should be held.
func (mb *msgBlock) tombsLocked() []msgId {
if mb.cacheNotLoaded() {
if err := mb.loadMsgsWithLock(); err != nil {
return nil
}
}
var tombs []msgId
var le = binary.LittleEndian
buf := mb.cache.buf
@@ -7349,7 +7664,7 @@ func (fs *fileStore) Truncate(seq uint64) error {
for mb := getLastMsgBlock(); mb != nlmb; mb = getLastMsgBlock() {
mb.mu.Lock()
// We do this to load tombs.
tombs = append(tombs, mb.tombs()...)
tombs = append(tombs, mb.tombsLocked()...)
purged += mb.msgs
bytes += mb.bytes
fs.removeMsgBlock(mb)
@@ -7578,6 +7893,9 @@ func (mb *msgBlock) recalculateFirstForSubj(subj string, startSeq uint64, ss *Si
continue
}
ss.First = seq
if ss.Msgs == 1 {
ss.Last = seq
}
return
}
}
@@ -7824,7 +8142,11 @@ func (fs *fileStore) setSyncTimer() {
if fs.syncTmr != nil {
fs.syncTmr.Reset(fs.fcfg.SyncInterval)
} else {
fs.syncTmr = time.AfterFunc(fs.fcfg.SyncInterval, fs.syncBlocks)
// First time this fires will be between SyncInterval/2 and SyncInterval,
// so that different stores are spread out, rather than having many of
// them trying to all sync at once, causing blips and contending dios.
start := (fs.fcfg.SyncInterval / 2) + (time.Duration(mrand.Int63n(int64(fs.fcfg.SyncInterval / 2))))
fs.syncTmr = time.AfterFunc(start, fs.syncBlocks)
}
}
@@ -7847,8 +8169,10 @@ func (fs *fileStore) flushStreamStateLoop(qch, done chan struct{}) {
defer close(done)
// Make sure we do not try to write these out too fast.
// Spread these out for large numbers on a server restart.
const writeThreshold = 2 * time.Minute
t := time.NewTicker(writeThreshold)
writeJitter := time.Duration(mrand.Int63n(int64(30 * time.Second)))
t := time.NewTicker(writeThreshold + writeJitter)
defer t.Stop()
for {
@@ -8037,7 +8361,7 @@ func (fs *fileStore) _writeFullState(force bool) error {
// Snapshot prior dirty count.
priorDirty := fs.dirty
statesEqual := trackingStatesEqual(&fs.state, &mstate) || len(fs.blks) > 0
statesEqual := trackingStatesEqual(&fs.state, &mstate)
// Release lock.
fs.mu.Unlock()
@@ -9010,14 +9334,6 @@ func (o *consumerFileStore) UpdateConfig(cfg *ConsumerConfig) error {
}
func (o *consumerFileStore) Update(state *ConsumerState) error {
o.mu.Lock()
defer o.mu.Unlock()
// Check to see if this is an outdated update.
if state.Delivered.Consumer < o.state.Delivered.Consumer || state.AckFloor.Stream < o.state.AckFloor.Stream {
return nil
}
// Sanity checks.
if state.AckFloor.Consumer > state.Delivered.Consumer {
return fmt.Errorf("bad ack floor for consumer")
@@ -9045,6 +9361,15 @@ func (o *consumerFileStore) Update(state *ConsumerState) error {
}
}
// Replace our state.
o.mu.Lock()
defer o.mu.Unlock()
// Check to see if this is an outdated update.
if state.Delivered.Consumer < o.state.Delivered.Consumer || state.AckFloor.Stream < o.state.AckFloor.Stream {
return fmt.Errorf("old update ignored")
}
o.state.Delivered = state.Delivered
o.state.AckFloor = state.AckFloor
o.state.Pending = pending
@@ -9712,14 +10037,22 @@ func (alg StoreCompression) Decompress(buf []byte) ([]byte, error) {
// sets O_SYNC on the open file if SyncAlways is set. The dios semaphore is
// handled automatically by this function, so don't wrap calls to it in dios.
func (fs *fileStore) writeFileWithOptionalSync(name string, data []byte, perm fs.FileMode) error {
if fs.fcfg.SyncAlways {
return writeFileWithSync(name, data, perm)
}
<-dios
defer func() {
dios <- struct{}{}
}()
flags := os.O_WRONLY | os.O_CREATE | os.O_TRUNC
if fs.fcfg.SyncAlways {
flags |= os.O_SYNC
}
return os.WriteFile(name, data, perm)
}
func writeFileWithSync(name string, data []byte, perm fs.FileMode) error {
<-dios
defer func() {
dios <- struct{}{}
}()
flags := os.O_WRONLY | os.O_CREATE | os.O_TRUNC | os.O_SYNC
f, err := os.OpenFile(name, flags, perm)
if err != nil {
return err

View File

@@ -1900,7 +1900,7 @@ func (c *client) processGatewayAccountSub(accName string) error {
// the sublist if present.
// <Invoked from outbound connection's readLoop>
func (c *client) processGatewayRUnsub(arg []byte) error {
accName, subject, queue, err := c.parseUnsubProto(arg)
_, accName, subject, queue, err := c.parseUnsubProto(arg, true, false)
if err != nil {
return fmt.Errorf("processGatewaySubjectUnsub %s", err.Error())
}

View File

@@ -461,6 +461,8 @@ func (s *Server) enableJetStream(cfg JetStreamConfig) error {
if err := s.enableJetStreamClustering(); err != nil {
return err
}
// Set our atomic bool to clustered.
s.jsClustered.Store(true)
}
// Mark when we are up and running.
@@ -965,6 +967,8 @@ func (s *Server) shutdownJetStream() {
cc.c = nil
}
cc.meta = nil
// Set our atomic bool to false.
s.jsClustered.Store(false)
}
js.mu.Unlock()
@@ -2103,7 +2107,7 @@ func (js *jetStream) wouldExceedLimits(storeType StorageType, sz int) bool {
} else {
total, max = &js.storeUsed, js.config.MaxStore
}
return atomic.LoadInt64(total) > (max + int64(sz))
return (atomic.LoadInt64(total) + int64(sz)) > max
}
func (js *jetStream) limitsExceeded(storeType StorageType) bool {

View File

@@ -2556,7 +2556,7 @@ func (s *Server) jsLeaderServerStreamMoveRequest(sub *subscription, c *client, _
cfg.Placement = origPlacement
s.Noticef("Requested move for stream '%s > %s' R=%d from %+v to %+v",
streamName, accName, cfg.Replicas, s.peerSetToNames(currPeers), s.peerSetToNames(peers))
accName, streamName, cfg.Replicas, s.peerSetToNames(currPeers), s.peerSetToNames(peers))
// We will always have peers and therefore never do a callout, therefore it is safe to call inline
s.jsClusteredStreamUpdateRequest(&ciNew, targetAcc.(*Account), subject, reply, rmsg, &cfg, peers)
@@ -2662,7 +2662,7 @@ func (s *Server) jsLeaderServerStreamCancelMoveRequest(sub *subscription, c *cli
}
s.Noticef("Requested cancel of move: R=%d '%s > %s' to peer set %+v and restore previous peer set %+v",
cfg.Replicas, streamName, accName, s.peerSetToNames(currPeers), s.peerSetToNames(peers))
cfg.Replicas, accName, streamName, s.peerSetToNames(currPeers), s.peerSetToNames(peers))
// We will always have peers and therefore never do a callout, therefore it is safe to call inline
s.jsClusteredStreamUpdateRequest(&ciNew, targetAcc.(*Account), subject, reply, rmsg, &cfg, peers)
@@ -3557,7 +3557,7 @@ func (s *Server) processStreamRestore(ci *ClientInfo, acc *Account, cfg *StreamC
if err != nil {
resp.Error = NewJSStreamRestoreError(err, Unless(err))
s.Warnf("Restore failed for %s for stream '%s > %s' in %v",
friendlyBytes(int64(total)), streamName, acc.Name, end.Sub(start))
friendlyBytes(int64(total)), acc.Name, streamName, end.Sub(start))
} else {
resp.StreamInfo = &StreamInfo{
Created: mset.createdTime(),
@@ -3566,7 +3566,7 @@ func (s *Server) processStreamRestore(ci *ClientInfo, acc *Account, cfg *StreamC
TimeStamp: time.Now().UTC(),
}
s.Noticef("Completed restore of %s for stream '%s > %s' in %v",
friendlyBytes(int64(total)), streamName, acc.Name, end.Sub(start).Round(time.Millisecond))
friendlyBytes(int64(total)), acc.Name, streamName, end.Sub(start).Round(time.Millisecond))
}
// On the last EOF, send back the stream info or error status.

View File

@@ -134,14 +134,15 @@ type streamAssignment struct {
Config *StreamConfig `json:"stream"`
Group *raftGroup `json:"group"`
Sync string `json:"sync"`
Subject string `json:"subject"`
Reply string `json:"reply"`
Subject string `json:"subject,omitempty"`
Reply string `json:"reply,omitempty"`
Restore *StreamState `json:"restore_state,omitempty"`
// Internal
consumers map[string]*consumerAssignment
responded bool
recovering bool
err error
consumers map[string]*consumerAssignment
responded bool
recovering bool
reassigning bool // i.e. due to placement issues, lack of resources, etc.
err error
}
// consumerAssignment is what the meta controller uses to assign consumers to streams.
@@ -152,12 +153,13 @@ type consumerAssignment struct {
Stream string `json:"stream"`
Config *ConsumerConfig `json:"consumer"`
Group *raftGroup `json:"group"`
Subject string `json:"subject"`
Reply string `json:"reply"`
Subject string `json:"subject,omitempty"`
Reply string `json:"reply,omitempty"`
State *ConsumerState `json:"state,omitempty"`
// Internal
responded bool
recovering bool
pending bool
deleted bool
err error
}
@@ -222,11 +224,7 @@ func (s *Server) getJetStreamCluster() (*jetStream, *jetStreamCluster) {
}
func (s *Server) JetStreamIsClustered() bool {
js := s.getJetStream()
if js == nil {
return false
}
return js.isClustered()
return s.jsClustered.Load()
}
func (s *Server) JetStreamIsLeader() bool {
@@ -780,10 +778,17 @@ func (js *jetStream) setupMetaGroup() error {
// Setup our WAL for the metagroup.
sysAcc := s.SystemAccount()
if sysAcc == nil {
return ErrNoSysAccount
}
storeDir := filepath.Join(js.config.StoreDir, sysAcc.Name, defaultStoreDirName, defaultMetaGroupName)
js.srv.optsMu.RLock()
syncAlways := js.srv.opts.SyncAlways
syncInterval := js.srv.opts.SyncInterval
js.srv.optsMu.RUnlock()
fs, err := newFileStoreWithCreated(
FileStoreConfig{StoreDir: storeDir, BlockSize: defaultMetaFSBlkSize, AsyncFlush: false, srv: s},
FileStoreConfig{StoreDir: storeDir, BlockSize: defaultMetaFSBlkSize, AsyncFlush: false, SyncAlways: syncAlways, SyncInterval: syncInterval, srv: s},
StreamConfig{Name: defaultMetaGroupName, Storage: FileStorage},
time.Now().UTC(),
s.jsKeyGen(s.getOpts().JetStreamKey, defaultMetaGroupName),
@@ -1131,9 +1136,10 @@ func (js *jetStream) isMetaRecovering() bool {
// During recovery track any stream and consumer delete and update operations.
type recoveryUpdates struct {
removeStreams map[string]*streamAssignment
removeConsumers map[string]*consumerAssignment
removeConsumers map[string]map[string]*consumerAssignment
addStreams map[string]*streamAssignment
updateStreams map[string]*streamAssignment
updateConsumers map[string]*consumerAssignment
updateConsumers map[string]map[string]*consumerAssignment
}
// Called after recovery of the cluster on startup to check for any orphans.
@@ -1310,7 +1316,7 @@ func (js *jetStream) monitorCluster() {
isLeader bool
lastSnapTime time.Time
compactSizeMin = uint64(8 * 1024 * 1024) // 8MB
minSnapDelta = 10 * time.Second
minSnapDelta = 30 * time.Second
)
// Highwayhash key for generating hashes.
@@ -1338,9 +1344,10 @@ func (js *jetStream) monitorCluster() {
ru := &recoveryUpdates{
removeStreams: make(map[string]*streamAssignment),
removeConsumers: make(map[string]*consumerAssignment),
removeConsumers: make(map[string]map[string]*consumerAssignment),
addStreams: make(map[string]*streamAssignment),
updateStreams: make(map[string]*streamAssignment),
updateConsumers: make(map[string]*consumerAssignment),
updateConsumers: make(map[string]map[string]*consumerAssignment),
}
// Make sure to cancel any pending checkForOrphans calls if the
@@ -1351,6 +1358,8 @@ func (js *jetStream) monitorCluster() {
for {
select {
case <-s.quitCh:
// Server shutting down, but we might receive this before qch, so try to snapshot.
doSnapshot()
return
case <-rqch:
return
@@ -1364,23 +1373,31 @@ func (js *jetStream) monitorCluster() {
ces := aq.pop()
for _, ce := range ces {
if ce == nil {
// Signals we have replayed all of our metadata.
js.clearMetaRecovering()
// Process any removes that are still valid after recovery.
for _, ca := range ru.removeConsumers {
js.processConsumerRemoval(ca)
for _, cas := range ru.removeConsumers {
for _, ca := range cas {
js.processConsumerRemoval(ca)
}
}
for _, sa := range ru.removeStreams {
js.processStreamRemoval(sa)
}
// Process stream additions.
for _, sa := range ru.addStreams {
js.processStreamAssignment(sa)
}
// Process pending updates.
for _, sa := range ru.updateStreams {
js.processUpdateStreamAssignment(sa)
}
// Now consumers.
for _, ca := range ru.updateConsumers {
js.processConsumerAssignment(ca)
for _, cas := range ru.updateConsumers {
for _, ca := range cas {
js.processConsumerAssignment(ca)
}
}
// Signals we have replayed all of our metadata.
js.clearMetaRecovering()
// Clear.
ru = nil
s.Debugf("Recovered JetStream cluster metadata")
@@ -1389,12 +1406,14 @@ func (js *jetStream) monitorCluster() {
go checkHealth()
continue
}
if didSnap, didStreamRemoval, didConsumerRemoval, err := js.applyMetaEntries(ce.Entries, ru); err == nil {
_, nb := n.Applied(ce.Index)
if didSnap, didStreamRemoval, _, err := js.applyMetaEntries(ce.Entries, ru); err == nil {
var nb uint64
// Some entries can fail without an error when shutting down, don't move applied forward.
if !js.isShuttingDown() {
_, nb = n.Applied(ce.Index)
}
if js.hasPeerEntries(ce.Entries) || didStreamRemoval || (didSnap && !isLeader) {
doSnapshot()
} else if didConsumerRemoval && time.Since(lastSnapTime) > minSnapDelta/2 {
doSnapshot()
} else if nb > compactSizeMin && time.Since(lastSnapTime) > minSnapDelta {
doSnapshot()
}
@@ -1406,10 +1425,6 @@ func (js *jetStream) monitorCluster() {
aq.recycle(&ces)
case isLeader = <-lch:
// For meta layer synchronize everyone to our state on becoming leader.
if isLeader && n.ApplyQ().len() == 0 {
n.SendSnapshot(js.metaSnapshot())
}
// Process the change.
js.processLeaderChange(isLeader)
if isLeader {
@@ -1514,9 +1529,12 @@ func (js *jetStream) clusterStreamConfig(accName, streamName string) (StreamConf
}
func (js *jetStream) metaSnapshot() []byte {
start := time.Now()
js.mu.RLock()
s := js.srv
cc := js.cluster
nsa := 0
nca := 0
for _, asa := range cc.streams {
nsa += len(asa)
}
@@ -1524,7 +1542,7 @@ func (js *jetStream) metaSnapshot() []byte {
for _, asa := range cc.streams {
for _, sa := range asa {
wsa := writeableStreamAssignment{
Client: sa.Client,
Client: sa.Client.forAssignmentSnap(),
Created: sa.Created,
Config: sa.Config,
Group: sa.Group,
@@ -1532,7 +1550,17 @@ func (js *jetStream) metaSnapshot() []byte {
Consumers: make([]*consumerAssignment, 0, len(sa.consumers)),
}
for _, ca := range sa.consumers {
wsa.Consumers = append(wsa.Consumers, ca)
// Skip if the consumer is pending, we can't include it in our snapshot.
// If the proposal fails after we marked it pending, it would result in a ghost consumer.
if ca.pending {
continue
}
cca := *ca
cca.Stream = wsa.Config.Name // Needed for safe roll-backs.
cca.Client = cca.Client.forAssignmentSnap()
cca.Subject, cca.Reply = _EMPTY_, _EMPTY_
wsa.Consumers = append(wsa.Consumers, &cca)
nca++
}
streams = append(streams, wsa)
}
@@ -1543,10 +1571,23 @@ func (js *jetStream) metaSnapshot() []byte {
return nil
}
// Track how long it took to marshal the JSON
mstart := time.Now()
b, _ := json.Marshal(streams)
mend := time.Since(mstart)
js.mu.RUnlock()
return s2.EncodeBetter(nil, b)
// Track how long it took to compress the JSON
cstart := time.Now()
snap := s2.Encode(nil, b)
cend := time.Since(cstart)
if took := time.Since(start); took > time.Second {
s.rateLimitFormatWarnf("Metalayer snapshot took %.3fs (streams: %d, consumers: %d, marshal: %.3fs, s2: %.3fs, uncompressed: %d, compressed: %d)",
took.Seconds(), nsa, nca, mend.Seconds(), cend.Seconds(), len(b), len(snap))
}
return snap
}
func (js *jetStream) applyMetaSnapshot(buf []byte, ru *recoveryUpdates, isRecovering bool) error {
@@ -1574,6 +1615,9 @@ func (js *jetStream) applyMetaSnapshot(buf []byte, ru *recoveryUpdates, isRecove
if len(wsa.Consumers) > 0 {
sa.consumers = make(map[string]*consumerAssignment)
for _, ca := range wsa.Consumers {
if ca.Stream == _EMPTY_ {
ca.Stream = sa.Config.Name // Rehydrate from the stream name.
}
sa.consumers[ca.Name] = ca
}
}
@@ -1630,7 +1674,10 @@ func (js *jetStream) applyMetaSnapshot(buf []byte, ru *recoveryUpdates, isRecove
if isRecovering {
key := sa.recoveryKey()
ru.removeStreams[key] = sa
delete(ru.addStreams, key)
delete(ru.updateStreams, key)
delete(ru.updateConsumers, key)
delete(ru.removeConsumers, key)
} else {
js.processStreamRemoval(sa)
}
@@ -1654,6 +1701,7 @@ func (js *jetStream) applyMetaSnapshot(buf []byte, ru *recoveryUpdates, isRecove
if isRecovering {
key := sa.recoveryKey()
ru.updateStreams[key] = sa
delete(ru.addStreams, key)
delete(ru.removeStreams, key)
} else {
js.processUpdateStreamAssignment(sa)
@@ -1665,8 +1713,14 @@ func (js *jetStream) applyMetaSnapshot(buf []byte, ru *recoveryUpdates, isRecove
js.setConsumerAssignmentRecovering(ca)
if isRecovering {
key := ca.recoveryKey()
ru.removeConsumers[key] = ca
delete(ru.updateConsumers, key)
skey := ca.streamRecoveryKey()
if _, ok := ru.removeConsumers[skey]; !ok {
ru.removeConsumers[skey] = map[string]*consumerAssignment{}
}
ru.removeConsumers[skey][key] = ca
if consumers, ok := ru.updateConsumers[skey]; ok {
delete(consumers, key)
}
} else {
js.processConsumerRemoval(ca)
}
@@ -1675,8 +1729,14 @@ func (js *jetStream) applyMetaSnapshot(buf []byte, ru *recoveryUpdates, isRecove
js.setConsumerAssignmentRecovering(ca)
if isRecovering {
key := ca.recoveryKey()
delete(ru.removeConsumers, key)
ru.updateConsumers[key] = ca
skey := ca.streamRecoveryKey()
if consumers, ok := ru.removeConsumers[skey]; ok {
delete(consumers, key)
}
if _, ok := ru.updateConsumers[skey]; !ok {
ru.updateConsumers[skey] = map[string]*consumerAssignment{}
}
ru.updateConsumers[skey][key] = ca
} else {
js.processConsumerAssignment(ca)
}
@@ -1889,6 +1949,13 @@ func (sa *streamAssignment) recoveryKey() string {
return sa.Client.serviceAccount() + ksep + sa.Config.Name
}
func (ca *consumerAssignment) streamRecoveryKey() string {
if ca == nil {
return _EMPTY_
}
return ca.Client.serviceAccount() + ksep + ca.Stream
}
func (ca *consumerAssignment) recoveryKey() string {
if ca == nil {
return _EMPTY_
@@ -1923,9 +1990,10 @@ func (js *jetStream) applyMetaEntries(entries []*Entry, ru *recoveryUpdates) (bo
}
if isRecovering {
js.setStreamAssignmentRecovering(sa)
delete(ru.removeStreams, sa.recoveryKey())
}
if js.processStreamAssignment(sa) {
key := sa.recoveryKey()
ru.addStreams[key] = sa
delete(ru.removeStreams, key)
} else if js.processStreamAssignment(sa) {
didRemoveStream = true
}
case removeStreamOp:
@@ -1938,7 +2006,10 @@ func (js *jetStream) applyMetaEntries(entries []*Entry, ru *recoveryUpdates) (bo
js.setStreamAssignmentRecovering(sa)
key := sa.recoveryKey()
ru.removeStreams[key] = sa
delete(ru.addStreams, key)
delete(ru.updateStreams, key)
delete(ru.updateConsumers, key)
delete(ru.removeConsumers, key)
} else {
js.processStreamRemoval(sa)
didRemoveStream = true
@@ -1952,8 +2023,14 @@ func (js *jetStream) applyMetaEntries(entries []*Entry, ru *recoveryUpdates) (bo
if isRecovering {
js.setConsumerAssignmentRecovering(ca)
key := ca.recoveryKey()
delete(ru.removeConsumers, key)
ru.updateConsumers[key] = ca
skey := ca.streamRecoveryKey()
if consumers, ok := ru.removeConsumers[skey]; ok {
delete(consumers, key)
}
if _, ok := ru.updateConsumers[skey]; !ok {
ru.updateConsumers[skey] = map[string]*consumerAssignment{}
}
ru.updateConsumers[skey][key] = ca
} else {
js.processConsumerAssignment(ca)
}
@@ -1966,8 +2043,14 @@ func (js *jetStream) applyMetaEntries(entries []*Entry, ru *recoveryUpdates) (bo
if isRecovering {
js.setConsumerAssignmentRecovering(ca)
key := ca.recoveryKey()
delete(ru.removeConsumers, key)
ru.updateConsumers[key] = ca
skey := ca.streamRecoveryKey()
if consumers, ok := ru.removeConsumers[skey]; ok {
delete(consumers, key)
}
if _, ok := ru.updateConsumers[skey]; !ok {
ru.updateConsumers[skey] = map[string]*consumerAssignment{}
}
ru.updateConsumers[skey][key] = ca
} else {
js.processConsumerAssignment(ca)
}
@@ -1980,8 +2063,14 @@ func (js *jetStream) applyMetaEntries(entries []*Entry, ru *recoveryUpdates) (bo
if isRecovering {
js.setConsumerAssignmentRecovering(ca)
key := ca.recoveryKey()
ru.removeConsumers[key] = ca
delete(ru.updateConsumers, key)
skey := ca.streamRecoveryKey()
if _, ok := ru.removeConsumers[skey]; !ok {
ru.removeConsumers[skey] = map[string]*consumerAssignment{}
}
ru.removeConsumers[skey][key] = ca
if consumers, ok := ru.updateConsumers[skey]; ok {
delete(consumers, key)
}
} else {
js.processConsumerRemoval(ca)
didRemoveConsumer = true
@@ -1996,6 +2085,7 @@ func (js *jetStream) applyMetaEntries(entries []*Entry, ru *recoveryUpdates) (bo
js.setStreamAssignmentRecovering(sa)
key := sa.recoveryKey()
ru.updateStreams[key] = sa
delete(ru.addStreams, key)
delete(ru.removeStreams, key)
} else {
js.processUpdateStreamAssignment(sa)
@@ -2053,8 +2143,32 @@ func (js *jetStream) createRaftGroup(accName string, rg *raftGroup, storage Stor
}
// Check if we already have this assigned.
retry:
if node := s.lookupRaftNode(rg.Name); node != nil {
if node.State() == Closed {
// We're waiting for this node to finish shutting down before we replace it.
js.mu.Unlock()
node.WaitForStop()
js.mu.Lock()
goto retry
}
s.Debugf("JetStream cluster already has raft group %q assigned", rg.Name)
// Check and see if the group has the same peers. If not then we
// will update the known peers, which will send a peerstate if leader.
groupPeerIDs := append([]string{}, rg.Peers...)
var samePeers bool
if nodePeers := node.Peers(); len(rg.Peers) == len(nodePeers) {
nodePeerIDs := make([]string, 0, len(nodePeers))
for _, n := range nodePeers {
nodePeerIDs = append(nodePeerIDs, n.ID)
}
slices.Sort(groupPeerIDs)
slices.Sort(nodePeerIDs)
samePeers = slices.Equal(groupPeerIDs, nodePeerIDs)
}
if !samePeers {
node.UpdateKnownPeers(groupPeerIDs)
}
rg.node = node
js.mu.Unlock()
return nil
@@ -2082,8 +2196,13 @@ func (js *jetStream) createRaftGroup(accName string, rg *raftGroup, storage Stor
storeDir := filepath.Join(js.config.StoreDir, sysAcc.Name, defaultStoreDirName, rg.Name)
var store StreamStore
if storage == FileStorage {
// If the server is set to sync always, do the same for the Raft log.
js.srv.optsMu.RLock()
syncAlways := js.srv.opts.SyncAlways
syncInterval := js.srv.opts.SyncInterval
js.srv.optsMu.RUnlock()
fs, err := newFileStoreWithCreated(
FileStoreConfig{StoreDir: storeDir, BlockSize: defaultMediumBlockSize, AsyncFlush: false, SyncInterval: 5 * time.Minute, srv: s},
FileStoreConfig{StoreDir: storeDir, BlockSize: defaultMediumBlockSize, AsyncFlush: false, SyncAlways: syncAlways, SyncInterval: syncInterval, srv: s},
StreamConfig{Name: rg.Name, Storage: FileStorage, Metadata: labels},
time.Now().UTC(),
s.jsKeyGen(s.getOpts().JetStreamKey, rg.Name),
@@ -2324,7 +2443,6 @@ func (js *jetStream) monitorStream(mset *stream, sa *streamAssignment, sendSnaps
// fully recovered from disk.
isRecovering := true
// Should only to be called from leader.
doSnapshot := func() {
if mset == nil || isRecovering || isRestore || time.Since(lastSnapTime) < minSnapDelta {
return
@@ -2834,7 +2952,7 @@ func (mset *stream) resetClusteredState(err error) bool {
// If we detect we are shutting down just return.
if js != nil && js.isShuttingDown() {
s.Debugf("Will not reset stream, jetstream shutting down")
s.Debugf("Will not reset stream, JetStream shutting down")
return false
}
@@ -3835,6 +3953,14 @@ func (js *jetStream) processClusterCreateStream(acc *Account, sa *streamAssignme
// This is an error condition.
if err != nil {
// If we're shutting down we could get a variety of errors, for example:
// 'JetStream not enabled for account' when looking up the stream.
// Normally we can continue and delete state, but need to be careful when shutting down.
if js.isShuttingDown() {
s.Debugf("Could not create stream, JetStream shutting down")
return
}
if IsNatsErr(err, JSStreamStoreFailedF) {
s.Warnf("Stream create failed for '%s > %s': %v", sa.Client.serviceAccount(), sa.Config.Name, err)
err = errStreamStoreFailed
@@ -4129,8 +4255,10 @@ func (js *jetStream) processConsumerAssignment(ca *consumerAssignment) {
return
}
js.mu.Lock()
sa := js.streamAssignment(accName, stream)
if sa == nil {
js.mu.Unlock()
s.Debugf("Consumer create failed, could not locate stream '%s > %s'", accName, stream)
return
}
@@ -4142,7 +4270,6 @@ func (js *jetStream) processConsumerAssignment(ca *consumerAssignment) {
var wasExisting bool
// Check if we have an existing consumer assignment.
js.mu.Lock()
if sa.consumers == nil {
sa.consumers = make(map[string]*consumerAssignment)
} else if oca := sa.consumers[ca.Name]; oca != nil {
@@ -4163,6 +4290,7 @@ func (js *jetStream) processConsumerAssignment(ca *consumerAssignment) {
// Place into our internal map under the stream assignment.
// Ok to replace an existing one, we check on process call below.
sa.consumers[ca.Name] = ca
ca.pending = false
js.mu.Unlock()
acc, err := s.LookupAccount(accName)
@@ -4426,6 +4554,13 @@ func (js *jetStream) processClusterCreateConsumer(ca *consumerAssignment, state
}
if err != nil {
// If we're shutting down we could get a variety of errors.
// Normally we can continue and delete state, but need to be careful when shutting down.
if js.isShuttingDown() {
s.Debugf("Could not create consumer, JetStream shutting down")
return
}
if IsNatsErr(err, JSConsumerStoreFailedErrF) {
s.Warnf("Consumer create failed for '%s > %s > %s': %v", ca.Client.serviceAccount(), ca.Stream, ca.Name, err)
err = errConsumerStoreFailed
@@ -4821,7 +4956,11 @@ func (js *jetStream) monitorConsumer(o *consumer, ca *consumerAssignment) {
doSnapshot(true)
}
} else if err := js.applyConsumerEntries(o, ce, isLeader); err == nil {
ne, nb := n.Applied(ce.Index)
var ne, nb uint64
// We can't guarantee writes are flushed while we're shutting down. Just rely on replay during recovery.
if !js.isShuttingDown() {
ne, nb = n.Applied(ce.Index)
}
ce.ReturnToPool()
// If we have at least min entries to compact, go ahead and snapshot/compact.
if nb > 0 && ne >= compactNumMin || nb > compactSizeMin {
@@ -4838,23 +4977,13 @@ func (js *jetStream) monitorConsumer(o *consumer, ca *consumerAssignment) {
}
// Process the change.
if err := js.processConsumerLeaderChange(o, isLeader); err == nil && isLeader {
if err := js.processConsumerLeaderChange(o, isLeader); err == nil {
// Check our state if we are under an interest based stream.
if mset := o.getStream(); mset != nil {
var ss StreamState
mset.store.FastState(&ss)
o.checkStateForInterestStream(&ss)
}
// Do a snapshot.
doSnapshot(true)
// Synchronize followers to our state. Only send out if we have state and nothing pending.
if n != nil {
if _, _, applied := n.Progress(); applied > 0 && aq.len() == 0 {
if snap, err := o.store.EncodedState(); err == nil {
n.SendSnapshot(snap)
}
}
}
}
// We may receive a leader change after the consumer assignment which would cancel us
@@ -4962,6 +5091,7 @@ func (js *jetStream) applyConsumerEntries(o *consumer, ce *CommittedEntry, isLea
}
panic(err.Error())
}
if err = o.store.Update(state); err != nil {
o.mu.RLock()
s, acc, mset, name := o.srv, o.acc, o.mset, o.name
@@ -4974,17 +5104,10 @@ func (js *jetStream) applyConsumerEntries(o *consumer, ce *CommittedEntry, isLea
if mset := o.getStream(); mset != nil {
var ss StreamState
mset.store.FastState(&ss)
if err := o.checkStateForInterestStream(&ss); err == errAckFloorHigherThanLastSeq {
// Register pre-acks unless no state at all for the stream and we would create alot of pre-acks.
mset.mu.Lock()
// Only register if we have a valid FirstSeq.
if ss.FirstSeq > 0 {
for seq := ss.FirstSeq; seq < state.AckFloor.Stream; seq++ {
mset.registerPreAck(o, seq)
}
}
mset.mu.Unlock()
}
// We used to register preacks here if our ack floor was higher than the last sequence.
// Now when streams catch up they properly call checkInterestState() and periodically run this as well.
// If our states drift this could have allocated lots of pre-acks.
o.checkStateForInterestStream(&ss)
}
}
@@ -5015,25 +5138,22 @@ func (js *jetStream) applyConsumerEntries(o *consumer, ce *CommittedEntry, isLea
buf := e.Data
switch entryOp(buf[0]) {
case updateDeliveredOp:
// These are handled in place in leaders.
if !isLeader {
dseq, sseq, dc, ts, err := decodeDeliveredUpdate(buf[1:])
if err != nil {
if mset, node := o.streamAndNode(); mset != nil && node != nil {
s := js.srv
s.Errorf("JetStream cluster could not decode consumer delivered update for '%s > %s > %s' [%s]",
mset.account(), mset.name(), o, node.Group())
}
panic(err.Error())
}
// Make sure to update delivered under the lock.
o.mu.Lock()
err = o.store.UpdateDelivered(dseq, sseq, dc, ts)
o.ldt = time.Now()
o.mu.Unlock()
if err != nil {
panic(err.Error())
dseq, sseq, dc, ts, err := decodeDeliveredUpdate(buf[1:])
if err != nil {
if mset, node := o.streamAndNode(); mset != nil && node != nil {
s := js.srv
s.Errorf("JetStream cluster could not decode consumer delivered update for '%s > %s > %s' [%s]",
mset.account(), mset.name(), o, node.Group())
}
panic(err.Error())
}
// Make sure to update delivered under the lock.
o.mu.Lock()
err = o.store.UpdateDelivered(dseq, sseq, dc, ts)
o.ldt = time.Now()
o.mu.Unlock()
if err != nil {
panic(err.Error())
}
case updateAcksOp:
dseq, sseq, err := decodeAckUpdate(buf[1:])
@@ -5359,8 +5479,7 @@ func (js *jetStream) processStreamAssignmentResults(sub *subscription, c *client
// then we will do the proper thing. Otherwise will be a no-op.
cc.removeInflightProposal(result.Account, result.Stream)
// FIXME(dlc) - suppress duplicates?
if sa := js.streamAssignment(result.Account, result.Stream); sa != nil {
if sa := js.streamAssignment(result.Account, result.Stream); sa != nil && !sa.reassigning {
canDelete := !result.Update && time.Since(sa.Created) < 5*time.Second
// See if we should retry in case this cluster is full but there are others.
@@ -5386,6 +5505,10 @@ func (js *jetStream) processStreamAssignmentResults(sub *subscription, c *client
// Propose new.
sa.Group, sa.err = rg, nil
cc.meta.Propose(encodeAddStreamAssignment(sa))
// When the new stream assignment is processed, sa.reassigning will be
// automatically set back to false. Until then, don't process any more
// assignment results.
sa.reassigning = true
return
}
}
@@ -6185,6 +6308,10 @@ func sysRequest[T any](s *Server, subjFormat string, args ...any) (*T, error) {
isubj := fmt.Sprintf(subjFormat, args...)
s.mu.Lock()
if s.sys == nil {
s.mu.Unlock()
return nil, ErrNoSysAccount
}
inbox := s.newRespInbox()
results := make(chan *T, 1)
s.sys.replies[inbox] = func(_ *subscription, _ *client, _ *Account, _, _ string, msg []byte) {
@@ -7532,14 +7659,15 @@ func (s *Server) jsClusteredConsumerRequest(ci *ClientInfo, acc *Account, subjec
ca = nca
}
// Mark this as pending.
if sa.consumers == nil {
sa.consumers = make(map[string]*consumerAssignment)
}
sa.consumers[ca.Name] = ca
// Do formal proposal.
cc.meta.Propose(encodeAddConsumerAssignment(ca))
if err := cc.meta.Propose(encodeAddConsumerAssignment(ca)); err == nil {
// Mark this as pending.
if sa.consumers == nil {
sa.consumers = make(map[string]*consumerAssignment)
}
ca.pending = true
sa.consumers[ca.Name] = ca
}
}
func encodeAddConsumerAssignment(ca *consumerAssignment) []byte {
@@ -7655,54 +7783,46 @@ const compressThreshold = 8192 // 8k
// If allowed and contents over the threshold we will compress.
func encodeStreamMsgAllowCompress(subject, reply string, hdr, msg []byte, lseq uint64, ts int64, compressOK bool) []byte {
shouldCompress := compressOK && len(subject)+len(reply)+len(hdr)+len(msg) > compressThreshold
// Clip the subject, reply, header and msgs down. Operate on
// uint64 lengths to avoid overflowing.
slen := min(uint64(len(subject)), math.MaxUint16)
rlen := min(uint64(len(reply)), math.MaxUint16)
hlen := min(uint64(len(hdr)), math.MaxUint16)
mlen := min(uint64(len(msg)), math.MaxUint32)
total := slen + rlen + hlen + mlen
elen := 1 + 8 + 8 + len(subject) + len(reply) + len(hdr) + len(msg)
shouldCompress := compressOK && total > compressThreshold
elen := int(1 + 8 + 8 + total)
elen += (2 + 2 + 2 + 4) // Encoded lengths, 4bytes
// TODO(dlc) - check sizes of subject, reply and hdr, make sure uint16 ok.
buf := make([]byte, elen)
buf := make([]byte, 1, elen)
buf[0] = byte(streamMsgOp)
var le = binary.LittleEndian
wi := 1
le.PutUint64(buf[wi:], lseq)
wi += 8
le.PutUint64(buf[wi:], uint64(ts))
wi += 8
le.PutUint16(buf[wi:], uint16(len(subject)))
wi += 2
copy(buf[wi:], subject)
wi += len(subject)
le.PutUint16(buf[wi:], uint16(len(reply)))
wi += 2
copy(buf[wi:], reply)
wi += len(reply)
le.PutUint16(buf[wi:], uint16(len(hdr)))
wi += 2
if len(hdr) > 0 {
copy(buf[wi:], hdr)
wi += len(hdr)
}
le.PutUint32(buf[wi:], uint32(len(msg)))
wi += 4
if len(msg) > 0 {
copy(buf[wi:], msg)
wi += len(msg)
}
buf = le.AppendUint64(buf, lseq)
buf = le.AppendUint64(buf, uint64(ts))
buf = le.AppendUint16(buf, uint16(slen))
buf = append(buf, subject[:slen]...)
buf = le.AppendUint16(buf, uint16(rlen))
buf = append(buf, reply[:rlen]...)
buf = le.AppendUint16(buf, uint16(hlen))
buf = append(buf, hdr[:hlen]...)
buf = le.AppendUint32(buf, uint32(mlen))
buf = append(buf, msg[:mlen]...)
// Check if we should compress.
if shouldCompress {
nbuf := make([]byte, s2.MaxEncodedLen(elen))
nbuf[0] = byte(compressedStreamMsgOp)
ebuf := s2.Encode(nbuf[1:], buf[1:wi])
// Only pay cost of decode the other side if we compressed.
ebuf := s2.Encode(nbuf[1:], buf[1:])
// Only pay the cost of decode on the other side if we compressed.
// S2 will allow us to try without major penalty for non-compressable data.
if len(ebuf) < wi {
nbuf = nbuf[:len(ebuf)+1]
buf, wi = nbuf, len(nbuf)
if len(ebuf) < len(buf) {
buf = nbuf[:len(ebuf)+1]
}
}
return buf[:wi]
return buf
}
// Determine if all peers in our set support the binary snapshot.
@@ -7865,7 +7985,7 @@ func (mset *stream) processClusteredInboundMsg(subject, reply string, hdr, msg [
// Check msgSize if we have a limit set there. Again this works if it goes through but better to be pre-emptive.
if maxMsgSize >= 0 && (len(hdr)+len(msg)) > maxMsgSize {
err := fmt.Errorf("JetStream message size exceeds limits for '%s > %s'", jsa.acc().Name, mset.cfg.Name)
s.RateLimitWarnf(err.Error())
s.RateLimitWarnf("%s", err.Error())
if canRespond {
var resp = &JSPubAckResponse{PubAck: &PubAck{Stream: name}}
resp.Error = NewJSStreamMessageExceedsMaximumError()
@@ -7882,7 +8002,7 @@ func (mset *stream) processClusteredInboundMsg(subject, reply string, hdr, msg [
// Again this works if it goes through but better to be pre-emptive.
if len(hdr) > math.MaxUint16 {
err := fmt.Errorf("JetStream header size exceeds limits for '%s > %s'", jsa.acc().Name, mset.cfg.Name)
s.RateLimitWarnf(err.Error())
s.RateLimitWarnf("%s", err.Error())
if canRespond {
var resp = &JSPubAckResponse{PubAck: &PubAck{Stream: name}}
resp.Error = NewJSStreamHeaderExceedsMaximumError()
@@ -8014,7 +8134,7 @@ func (mset *stream) processClusteredInboundMsg(subject, reply string, hdr, msg [
// TODO(dlc) - Make this a limit where we drop messages to protect ourselves, but allow to be configured.
if mset.clseq-(lseq+mset.clfs) > streamLagWarnThreshold {
lerr := fmt.Errorf("JetStream stream '%s > %s' has high message lag", jsa.acc().Name, name)
s.RateLimitWarnf(lerr.Error())
s.RateLimitWarnf("%s", lerr.Error())
}
mset.clMu.Unlock()
@@ -8290,7 +8410,16 @@ RETRY:
releaseSyncOutSem()
if n.GroupLeader() == _EMPTY_ {
return fmt.Errorf("%w for stream '%s > %s'", errCatchupAbortedNoLeader, mset.account(), mset.name())
// Prevent us from spinning if we've installed a snapshot from a leader but there's no leader online.
// We wait a bit to check if a leader has come online in the meantime, if so we can continue.
var canContinue bool
if numRetries == 0 {
time.Sleep(startInterval)
canContinue = n.GroupLeader() != _EMPTY_
}
if !canContinue {
return fmt.Errorf("%w for stream '%s > %s'", errCatchupAbortedNoLeader, mset.account(), mset.name())
}
}
// If we have a sub clear that here.
@@ -8873,17 +9002,6 @@ func (mset *stream) runCatchup(sendSubject string, sreq *streamSyncRequest) {
// mset.store never changes after being set, don't need lock.
mset.store.FastState(&state)
// Reset notion of first if this request wants sequences before our starting sequence
// and we would have nothing to send. If we have partial messages still need to send skips for those.
// We will keep sreq's first sequence to not create sequence mismatches on the follower, but we extend the last to our current state.
if sreq.FirstSeq < state.FirstSeq && state.FirstSeq > sreq.LastSeq {
s.Debugf("Catchup for stream '%s > %s' resetting request first sequence from %d to %d",
mset.account(), mset.name(), sreq.FirstSeq, state.FirstSeq)
if state.LastSeq > sreq.LastSeq {
sreq.LastSeq = state.LastSeq
}
}
// Setup sequences to walk through.
seq, last := sreq.FirstSeq, sreq.LastSeq
mset.setCatchupPeer(sreq.Peer, last-seq)
@@ -8972,20 +9090,26 @@ func (mset *stream) runCatchup(sendSubject string, sreq *streamSyncRequest) {
for ; seq <= last && atomic.LoadInt64(&outb) <= maxOutBytes && atomic.LoadInt32(&outm) <= maxOutMsgs && s.gcbBelowMax(); seq++ {
var sm *StoreMsg
var err error
// Is we should use load next do so here.
// If we should use load next do so here.
if useLoadNext {
var nseq uint64
sm, nseq, err = mset.store.LoadNextMsg(fwcs, true, seq, &smv)
if err == nil && nseq > seq {
// If we jumped over the requested last sequence, clamp it down.
// Otherwise, we would send too much to the follower.
if nseq > last {
nseq = last
sm = nil
}
dr.First, dr.Num = seq, nseq-seq
// Jump ahead
seq = nseq
} else if err == ErrStoreEOF {
dr.First, dr.Num = seq, state.LastSeq-seq
dr.First, dr.Num = seq, last-seq
// Clear EOF here for normal processing.
err = nil
// Jump ahead
seq = state.LastSeq
seq = last
}
} else {
sm, err = mset.store.LoadMsg(seq, &smv)
@@ -9047,25 +9171,10 @@ func (mset *stream) runCatchup(sendSubject string, sreq *streamSyncRequest) {
if drOk && dr.First > 0 {
sendDR()
}
// Check for a condition where our state's first is now past the last that we could have sent.
// If so reset last and continue sending.
var state StreamState
mset.mu.RLock()
mset.store.FastState(&state)
mset.mu.RUnlock()
if last < state.FirstSeq {
last = state.LastSeq
}
// Recheck our exit condition.
if seq == last {
if drOk && dr.First > 0 {
sendDR()
}
s.Noticef("Catchup for stream '%s > %s' complete", mset.account(), mset.name())
// EOF
s.sendInternalMsgLocked(sendSubject, _EMPTY_, nil, nil)
return false
}
s.Noticef("Catchup for stream '%s > %s' complete", mset.account(), mset.name())
// EOF
s.sendInternalMsgLocked(sendSubject, _EMPTY_, nil, nil)
return false
}
select {
case <-remoteQuitCh:

View File

@@ -774,7 +774,7 @@ func (s *Server) startLeafNodeAcceptLoop() {
}
// RegEx to match a creds file with user JWT and Seed.
var credsRe = regexp.MustCompile(`\s*(?:(?:[-]{3,}[^\n]*[-]{3,}\n)(.+)(?:\n\s*[-]{3,}[^\n]*[-]{3,}\n))`)
var credsRe = regexp.MustCompile(`\s*(?:(?:[-]{3,}.*[-]{3,}\r?\n)([\w\-.=]+)(?:\r?\n[-]{3,}.*[-]{3,}(\r?\n|\z)))`)
// clusterName is provided as argument to avoid lock ordering issues with the locked client c
// Lock should be held entering here.
@@ -2271,6 +2271,42 @@ func keyFromSub(sub *subscription) string {
return sb.String()
}
const (
keyRoutedSub = "R"
keyRoutedSubByte = 'R'
keyRoutedLeafSub = "L"
keyRoutedLeafSubByte = 'L'
)
// Helper function to build the key that prevents collisions between normal
// routed subscriptions and routed subscriptions on behalf of a leafnode.
// Keys will look like this:
// "R foo" -> plain routed sub on "foo"
// "R foo bar" -> queue routed sub on "foo", queue "bar"
// "L foo bar" -> plain routed leaf sub on "foo", leaf "bar"
// "L foo bar baz" -> queue routed sub on "foo", queue "bar", leaf "baz"
func keyFromSubWithOrigin(sub *subscription) string {
var sb strings.Builder
sb.Grow(2 + len(sub.origin) + 1 + len(sub.subject) + 1 + len(sub.queue))
leaf := len(sub.origin) > 0
if leaf {
sb.WriteByte(keyRoutedLeafSubByte)
} else {
sb.WriteByte(keyRoutedSubByte)
}
sb.WriteByte(' ')
sb.Write(sub.subject)
if sub.queue != nil {
sb.WriteByte(' ')
sb.Write(sub.queue)
}
if leaf {
sb.WriteByte(' ')
sb.Write(sub.origin)
}
return sb.String()
}
// Lock should be held.
func (c *client) writeLeafSub(w *bytes.Buffer, key string, n int32) {
if key == _EMPTY_ {
@@ -2321,12 +2357,21 @@ func (c *client) processLeafSub(argo []byte) (err error) {
args := splitArg(arg)
sub := &subscription{client: c}
delta := int32(1)
switch len(args) {
case 1:
sub.queue = nil
case 3:
sub.queue = args[1]
sub.qw = int32(parseSize(args[2]))
// TODO: (ik) We should have a non empty queue name and a queue
// weight >= 1. For 2.11, we may want to return an error if that
// is not the case, but for now just overwrite `delta` if queue
// weight is greater than 1 (it is possible after a reconnect/
// server restart to receive a queue weight > 1 for a new sub).
if sub.qw > 1 {
delta = sub.qw
}
default:
return fmt.Errorf("processLeafSub Parse Error: '%s'", arg)
}
@@ -2391,7 +2436,6 @@ func (c *client) processLeafSub(argo []byte) (err error) {
key := bytesToString(sub.sid)
osub := c.subs[key]
updateGWs := false
delta := int32(1)
if osub == nil {
c.subs[key] = sub
// Now place into the account sl.
@@ -2472,6 +2516,10 @@ func (c *client) processLeafUnsub(arg []byte) error {
// We store local subs by account and subject and optionally queue name.
// LS- will have the arg exactly as the key.
sub, ok := c.subs[string(arg)]
delta := int32(1)
if ok && len(sub.queue) > 0 {
delta = sub.qw
}
c.mu.Unlock()
if ok {
@@ -2481,14 +2529,14 @@ func (c *client) processLeafUnsub(arg []byte) error {
if !spoke {
// If we are routing subtract from the route map for the associated account.
srv.updateRouteSubscriptionMap(acc, sub, -1)
srv.updateRouteSubscriptionMap(acc, sub, -delta)
// Gateways
if updateGWs {
srv.gatewayUpdateSubInterest(acc.Name, sub, -1)
srv.gatewayUpdateSubInterest(acc.Name, sub, -delta)
}
}
// Now check on leafnode updates for other leaf nodes.
acc.updateLeafNodes(sub, -1)
acc.updateLeafNodes(sub, -delta)
return nil
}

View File

@@ -84,10 +84,13 @@ func (ms *memStore) UpdateConfig(cfg *StreamConfig) error {
ms.ageChk = nil
}
// Make sure to update MaxMsgsPer
if cfg.MaxMsgsPer < -1 {
cfg.MaxMsgsPer = -1
}
maxp := ms.maxp
ms.maxp = cfg.MaxMsgsPer
// If the value is smaller we need to enforce that.
if ms.maxp != 0 && ms.maxp < maxp {
// If the value is smaller, or was unset before, we need to enforce that.
if ms.maxp > 0 && (maxp == 0 || ms.maxp < maxp) {
lm := uint64(ms.maxp)
ms.fss.Iter(func(subj []byte, ss *SimpleState) bool {
if ss.Msgs > lm {
@@ -359,15 +362,13 @@ func (ms *memStore) FilteredState(sseq uint64, subj string) SimpleState {
}
func (ms *memStore) filteredStateLocked(sseq uint64, filter string, lastPerSubject bool) SimpleState {
var ss SimpleState
if sseq < ms.state.FirstSeq {
sseq = ms.state.FirstSeq
}
// If past the end no results.
if sseq > ms.state.LastSeq {
return ss
return SimpleState{}
}
if filter == _EMPTY_ {
@@ -391,9 +392,10 @@ func (ms *memStore) filteredStateLocked(sseq uint64, filter string, lastPerSubje
_tsa, _fsa := [32]string{}, [32]string{}
tsa, fsa := _tsa[:0], _fsa[:0]
fsa = tokenizeSubjectIntoSlice(fsa[:0], filter)
wc := subjectHasWildcard(filter)
if wc {
fsa = tokenizeSubjectIntoSlice(fsa[:0], filter)
}
// 1. See if we match any subs from fss.
// 2. If we match and the sseq is past ss.Last then we can use meta only.
// 3. If we match we need to do a partial, break and clear any totals and do a full scan like num pending.
@@ -409,6 +411,7 @@ func (ms *memStore) filteredStateLocked(sseq uint64, filter string, lastPerSubje
return isSubsetMatchTokenized(tsa, fsa)
}
var ss SimpleState
update := func(fss *SimpleState) {
msgs, first, last := fss.Msgs, fss.First, fss.Last
if lastPerSubject {
@@ -424,6 +427,7 @@ func (ms *memStore) filteredStateLocked(sseq uint64, filter string, lastPerSubje
}
var havePartial bool
var totalSkipped uint64
// We will track start and end sequences as we go.
ms.fss.Match(stringToBytes(filter), func(subj []byte, fss *SimpleState) {
if fss.firstNeedsUpdate {
@@ -436,6 +440,8 @@ func (ms *memStore) filteredStateLocked(sseq uint64, filter string, lastPerSubje
havePartial = true
// Don't break here, we will update to keep tracking last.
update(fss)
} else {
totalSkipped += fss.Msgs
}
})
@@ -492,6 +498,7 @@ func (ms *memStore) filteredStateLocked(sseq uint64, filter string, lastPerSubje
} else {
// We will adjust from the totals above by scanning what we need to exclude.
ss.First = first
ss.Msgs += totalSkipped
var adjust uint64
var tss *SimpleState
@@ -563,8 +570,9 @@ func (ms *memStore) filteredStateLocked(sseq uint64, filter string, lastPerSubje
// SubjectsState returns a map of SimpleState for all matching subjects.
func (ms *memStore) SubjectsState(subject string) map[string]SimpleState {
ms.mu.RLock()
defer ms.mu.RUnlock()
// This needs to be a write lock, as we can mutate the per-subject state.
ms.mu.Lock()
defer ms.mu.Unlock()
if ms.fss.Size() == 0 {
return nil
@@ -630,6 +638,154 @@ func (ms *memStore) NumPending(sseq uint64, filter string, lastPerSubject bool)
return ss.Msgs, ms.state.LastSeq
}
// NumPending will return the number of pending messages matching any subject in the sublist starting at sequence.
func (ms *memStore) NumPendingMulti(sseq uint64, sl *Sublist, lastPerSubject bool) (total, validThrough uint64) {
if sl == nil {
return ms.NumPending(sseq, fwcs, lastPerSubject)
}
// This needs to be a write lock, as we can mutate the per-subject state.
ms.mu.Lock()
defer ms.mu.Unlock()
var ss SimpleState
if sseq < ms.state.FirstSeq {
sseq = ms.state.FirstSeq
}
// If past the end no results.
if sseq > ms.state.LastSeq {
return 0, ms.state.LastSeq
}
update := func(fss *SimpleState) {
msgs, first, last := fss.Msgs, fss.First, fss.Last
if lastPerSubject {
msgs, first = 1, last
}
ss.Msgs += msgs
if ss.First == 0 || first < ss.First {
ss.First = first
}
if last > ss.Last {
ss.Last = last
}
}
var havePartial bool
var totalSkipped uint64
// We will track start and end sequences as we go.
IntersectStree[SimpleState](ms.fss, sl, func(subj []byte, fss *SimpleState) {
if fss.firstNeedsUpdate {
ms.recalculateFirstForSubj(bytesToString(subj), fss.First, fss)
}
if sseq <= fss.First {
update(fss)
} else if sseq <= fss.Last {
// We matched but it is a partial.
havePartial = true
// Don't break here, we will update to keep tracking last.
update(fss)
} else {
totalSkipped += fss.Msgs
}
})
// If we did not encounter any partials we can return here.
if !havePartial {
return ss.Msgs, ms.state.LastSeq
}
// If we are here we need to scan the msgs.
// Capture first and last sequences for scan and then clear what we had.
first, last := ss.First, ss.Last
// To track if we decide to exclude we need to calculate first.
if first < sseq {
first = sseq
}
// Now we want to check if it is better to scan inclusive and recalculate that way
// or leave and scan exclusive and adjust our totals.
// ss.Last is always correct here.
toScan, toExclude := last-first, first-ms.state.FirstSeq+ms.state.LastSeq-ss.Last
var seen map[string]bool
if lastPerSubject {
seen = make(map[string]bool)
}
if toScan < toExclude {
ss.Msgs, ss.First = 0, 0
update := func(sm *StoreMsg) {
ss.Msgs++
if ss.First == 0 {
ss.First = sm.seq
}
if seen != nil {
seen[sm.subj] = true
}
}
// Check if easier to just scan msgs vs the sequence range.
// This can happen with lots of interior deletes.
if last-first > uint64(len(ms.msgs)) {
for _, sm := range ms.msgs {
if sm.seq >= first && sm.seq <= last && !seen[sm.subj] && sl.HasInterest(sm.subj) {
update(sm)
}
}
} else {
for seq := first; seq <= last; seq++ {
if sm, ok := ms.msgs[seq]; ok && !seen[sm.subj] && sl.HasInterest(sm.subj) {
update(sm)
}
}
}
} else {
// We will adjust from the totals above by scanning what we need to exclude.
ss.First = first
ss.Msgs += totalSkipped
var adjust uint64
var tss *SimpleState
update := func(sm *StoreMsg) {
if lastPerSubject {
tss, _ = ms.fss.Find(stringToBytes(sm.subj))
}
// If we are last per subject, make sure to only adjust if all messages are before our first.
if tss == nil || tss.Last < first {
adjust++
}
if seen != nil {
seen[sm.subj] = true
}
}
// Check if easier to just scan msgs vs the sequence range.
if first-ms.state.FirstSeq > uint64(len(ms.msgs)) {
for _, sm := range ms.msgs {
if sm.seq < first && !seen[sm.subj] && sl.HasInterest(sm.subj) {
update(sm)
}
}
} else {
for seq := ms.state.FirstSeq; seq < first; seq++ {
if sm, ok := ms.msgs[seq]; ok && !seen[sm.subj] && sl.HasInterest(sm.subj) {
update(sm)
}
}
}
// Now do range at end.
for seq := last + 1; seq < ms.state.LastSeq; seq++ {
if sm, ok := ms.msgs[seq]; ok && !seen[sm.subj] && sl.HasInterest(sm.subj) {
adjust++
if seen != nil {
seen[sm.subj] = true
}
}
}
ss.Msgs -= adjust
}
return ss.Msgs, ms.state.LastSeq
}
// Will check the msg limit for this tracked subject.
// Lock should be held.
func (ms *memStore) enforcePerSubjectLimit(subj string, ss *SimpleState) {
@@ -875,7 +1031,9 @@ func (ms *memStore) Compact(seq uint64) (uint64, error) {
ms.state.FirstSeq = seq
ms.state.FirstTime = time.Time{}
ms.state.LastSeq = seq - 1
// Reset msgs and fss.
ms.msgs = make(map[uint64]*StoreMsg)
ms.fss = stree.NewSubjectTree[SimpleState]()
}
ms.mu.Unlock()
@@ -1225,6 +1383,9 @@ func (ms *memStore) recalculateFirstForSubj(subj string, startSeq uint64, ss *Si
for ; tseq <= ss.Last; tseq++ {
if sm := ms.msgs[tseq]; sm != nil && sm.subj == subj {
ss.First = tseq
if ss.Msgs == 1 {
ss.Last = tseq
}
ss.firstNeedsUpdate = false
return
}
@@ -1488,8 +1649,6 @@ func (o *consumerMemStore) Update(state *ConsumerState) error {
pending = make(map[uint64]*Pending, len(state.Pending))
for seq, p := range state.Pending {
pending[seq] = &Pending{p.Sequence, p.Timestamp}
}
for seq := range pending {
if seq <= state.AckFloor.Stream || seq > state.Delivered.Stream {
return fmt.Errorf("bad pending entry, sequence [%d] out of range", seq)
}
@@ -1504,10 +1663,10 @@ func (o *consumerMemStore) Update(state *ConsumerState) error {
// Replace our state.
o.mu.Lock()
defer o.mu.Unlock()
// Check to see if this is an outdated update.
if state.Delivered.Consumer < o.state.Delivered.Consumer {
o.mu.Unlock()
if state.Delivered.Consumer < o.state.Delivered.Consumer || state.AckFloor.Stream < o.state.AckFloor.Stream {
return fmt.Errorf("old update ignored")
}
@@ -1515,7 +1674,6 @@ func (o *consumerMemStore) Update(state *ConsumerState) error {
o.state.AckFloor = state.AckFloor
o.state.Pending = pending
o.state.Redelivered = redelivered
o.mu.Unlock()
return nil
}

View File

@@ -3228,10 +3228,11 @@ func (s *Server) HandleHealthz(w http.ResponseWriter, r *http.Request) {
Details: includeDetails,
})
code := http.StatusOK
code := hs.StatusCode
if hs.Error != _EMPTY_ {
s.Warnf("Healthcheck failed: %q", hs.Error)
code = hs.StatusCode
} else if len(hs.Errors) != 0 {
s.Warnf("Healthcheck failed: %d errors", len(hs.Errors))
}
// Remove StatusCode from JSON representation when responding via HTTP
// since this is already in the response.

View File

@@ -1,4 +1,4 @@
// Copyright 2012-2023 The NATS Authors
// Copyright 2012-2024 The NATS Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -657,26 +657,28 @@ type authorization struct {
// TLSConfigOpts holds the parsed tls config information,
// used with flag parsing
type TLSConfigOpts struct {
CertFile string
KeyFile string
CaFile string
Verify bool
Insecure bool
Map bool
TLSCheckKnownURLs bool
HandshakeFirst bool // Indicate that the TLS handshake should occur first, before sending the INFO protocol.
FallbackDelay time.Duration // Where supported, indicates how long to wait for the handshake before falling back to sending the INFO protocol first.
Timeout float64
RateLimit int64
Ciphers []uint16
CurvePreferences []tls.CurveID
PinnedCerts PinnedCertSet
CertStore certstore.StoreType
CertMatchBy certstore.MatchByType
CertMatch string
OCSPPeerConfig *certidp.OCSPPeerConfig
Certificates []*TLSCertPairOpt
MinVersion uint16
CertFile string
KeyFile string
CaFile string
Verify bool
Insecure bool
Map bool
TLSCheckKnownURLs bool
HandshakeFirst bool // Indicate that the TLS handshake should occur first, before sending the INFO protocol.
FallbackDelay time.Duration // Where supported, indicates how long to wait for the handshake before falling back to sending the INFO protocol first.
Timeout float64
RateLimit int64
Ciphers []uint16
CurvePreferences []tls.CurveID
PinnedCerts PinnedCertSet
CertStore certstore.StoreType
CertMatchBy certstore.MatchByType
CertMatch string
CertMatchSkipInvalid bool
CaCertsMatch []string
OCSPPeerConfig *certidp.OCSPPeerConfig
Certificates []*TLSCertPairOpt
MinVersion uint16
}
// TLSCertPairOpt are the paths to a certificate and private key.
@@ -4419,6 +4421,28 @@ func parseTLS(v any, isClientCtx bool) (t *TLSConfigOpts, retErr error) {
return nil, &configErr{tk, certstore.ErrBadCertMatchField.Error()}
}
tc.CertMatch = certMatch
case "ca_certs_match":
rv := []string{}
switch mv := mv.(type) {
case string:
rv = append(rv, mv)
case []string:
rv = append(rv, mv...)
case []interface{}:
for _, t := range mv {
if token, ok := t.(token); ok {
if ts, ok := token.Value().(string); ok {
rv = append(rv, ts)
continue
} else {
return nil, &configErr{tk, fmt.Sprintf("error parsing ca_cert_match: unsupported type %T where string is expected", token)}
}
} else {
return nil, &configErr{tk, fmt.Sprintf("error parsing ca_cert_match: unsupported type %T", t)}
}
}
}
tc.CaCertsMatch = rv
case "handshake_first", "first", "immediate":
switch mv := mv.(type) {
case bool:
@@ -4444,6 +4468,12 @@ func parseTLS(v any, isClientCtx bool) (t *TLSConfigOpts, retErr error) {
default:
return nil, &configErr{tk, fmt.Sprintf("field %q should be a boolean or a string, got %T", mk, mv)}
}
case "cert_match_skip_invalid":
certMatchSkipInvalid, ok := mv.(bool)
if !ok {
return nil, &configErr{tk, certstore.ErrBadCertMatchSkipInvalidField.Error()}
}
tc.CertMatchSkipInvalid = certMatchSkipInvalid
case "ocsp_peer":
switch vv := mv.(type) {
case bool:
@@ -4819,7 +4849,7 @@ func GenTLSConfig(tc *TLSConfigOpts) (*tls.Config, error) {
}
config.Certificates = []tls.Certificate{cert}
case tc.CertStore != certstore.STOREEMPTY:
err := certstore.TLSConfig(tc.CertStore, tc.CertMatchBy, tc.CertMatch, &config)
err := certstore.TLSConfig(tc.CertStore, tc.CertMatchBy, tc.CertMatch, tc.CaCertsMatch, tc.CertMatchSkipInvalid, &config)
if err != nil {
return nil, err
}

View File

@@ -788,7 +788,8 @@ func (c *client) parse(buf []byte) error {
c.traceInOp("LS-", arg)
}
}
err = c.processRemoteUnsub(arg)
leafUnsub := c.op == 'L' || c.op == 'l'
err = c.processRemoteUnsub(arg, leafUnsub)
case GATEWAY:
if trace {
c.traceInOp("RS-", arg)

View File

File diff suppressed because it is too large Load Diff

View File

@@ -2172,15 +2172,22 @@ func (s *Server) reloadClusterPermissions(oldPerms *RoutePermissions) {
}
deleteRoutedSubs = deleteRoutedSubs[:0]
route.mu.Lock()
pa, _, hasSubType := route.getRoutedSubKeyInfo()
for key, sub := range route.subs {
if an := strings.Fields(key)[0]; an != accName {
continue
// If this is not a pinned-account route, we need to get the
// account name from the key to see if we collect this sub.
if !pa {
if an := getAccNameFromRoutedSubKey(sub, key, hasSubType); an != accName {
continue
}
}
// If we can't export, we need to drop the subscriptions that
// we have on behalf of this route.
// Need to make a string cast here since canExport call sl.Match()
subj := string(sub.subject)
if !route.canExport(subj) {
delete(route.subs, string(sub.sid))
// We can use bytesToString() here.
delete(route.subs, bytesToString(sub.sid))
deleteRoutedSubs = append(deleteRoutedSubs, sub)
}
}

View File

@@ -74,6 +74,7 @@ type route struct {
didSolicit bool
retry bool
lnoc bool
lnocu bool
routeType RouteType
url *url.URL
authRequired bool
@@ -112,6 +113,7 @@ type connectInfo struct {
Cluster string `json:"cluster"`
Dynamic bool `json:"cluster_dynamic,omitempty"`
LNOC bool `json:"lnoc,omitempty"`
LNOCU bool `json:"lnocu,omitempty"` // Support for LS- with origin cluster name
Gateway string `json:"gateway,omitempty"`
}
@@ -767,6 +769,7 @@ func (c *client) processRouteInfo(info *Info) {
c.route.gatewayURL = info.GatewayURL
c.route.remoteName = info.Name
c.route.lnoc = info.LNOC
c.route.lnocu = info.LNOCU
c.route.jetstream = info.JetStream
// When sent through route INFO, if the field is set, it should be of size 1.
@@ -1169,6 +1172,36 @@ type asubs struct {
subs []*subscription
}
// Returns the account name from the subscription's key.
// This is invoked knowing that the key contains an account name, so for a sub
// that is not from a pinned-account route.
// The `keyHasSubType` boolean indicates that the key starts with the indicator
// for leaf or regular routed subscriptions.
func getAccNameFromRoutedSubKey(sub *subscription, key string, keyHasSubType bool) string {
var accIdx int
if keyHasSubType {
// Start after the sub type indicator.
accIdx = 1
// But if there is an origin, bump its index.
if len(sub.origin) > 0 {
accIdx = 2
}
}
return strings.Fields(key)[accIdx]
}
// Returns if the route is dedicated to an account, its name, and a boolean
// that indicates if this route uses the routed subscription indicator at
// the beginning of the subscription key.
// Lock held on entry.
func (c *client) getRoutedSubKeyInfo() (bool, string, bool) {
var accName string
if an := c.route.accName; len(an) > 0 {
accName = string(an)
}
return accName != _EMPTY_, accName, c.route.lnocu
}
// removeRemoteSubs will walk the subs and remove them from the appropriate account.
func (c *client) removeRemoteSubs() {
// We need to gather these on a per account basis.
@@ -1178,14 +1211,18 @@ func (c *client) removeRemoteSubs() {
srv := c.srv
subs := c.subs
c.subs = nil
pa, accountName, hasSubType := c.getRoutedSubKeyInfo()
c.mu.Unlock()
for key, sub := range subs {
c.mu.Lock()
sub.max = 0
c.mu.Unlock()
// Grab the account
accountName := strings.Fields(key)[0]
// If not a pinned-account route, we need to find the account
// name from the sub's key.
if !pa {
accountName = getAccNameFromRoutedSubKey(sub, key, hasSubType)
}
ase := as[accountName]
if ase == nil {
if v, ok := srv.accounts.Load(accountName); ok {
@@ -1197,10 +1234,14 @@ func (c *client) removeRemoteSubs() {
} else {
ase.subs = append(ase.subs, sub)
}
if srv.gateway.enabled {
srv.gatewayUpdateSubInterest(accountName, sub, -1)
delta := int32(1)
if len(sub.queue) > 0 {
delta = sub.qw
}
ase.acc.updateLeafNodes(sub, -1)
if srv.gateway.enabled {
srv.gatewayUpdateSubInterest(accountName, sub, -delta)
}
ase.acc.updateLeafNodes(sub, -delta)
}
// Now remove the subs by batch for each account sublist.
@@ -1217,8 +1258,9 @@ func (c *client) removeRemoteSubs() {
// Lock is held on entry
func (c *client) removeRemoteSubsForAcc(name string) []*subscription {
var subs []*subscription
_, _, hasSubType := c.getRoutedSubKeyInfo()
for key, sub := range c.subs {
an := strings.Fields(key)[0]
an := getAccNameFromRoutedSubKey(sub, key, hasSubType)
if an == name {
sub.max = 0
subs = append(subs, sub)
@@ -1228,46 +1270,69 @@ func (c *client) removeRemoteSubsForAcc(name string) []*subscription {
return subs
}
func (c *client) parseUnsubProto(arg []byte) (string, []byte, []byte, error) {
func (c *client) parseUnsubProto(arg []byte, accInProto, hasOrigin bool) ([]byte, string, []byte, []byte, error) {
// Indicate any activity, so pub and sub or unsubs.
c.in.subs++
args := splitArg(arg)
var queue []byte
var accountName string
subjIdx := 1
c.mu.Lock()
if c.kind == ROUTER && c.route != nil {
if accountName = string(c.route.accName); accountName != _EMPTY_ {
subjIdx = 0
}
var (
origin []byte
accountName string
queue []byte
subjIdx int
)
// If `hasOrigin` is true, then it means this is a LS- with origin in proto.
if hasOrigin {
// We would not be here if there was not at least 1 field.
origin = args[0]
subjIdx = 1
}
// If there is an account in the protocol, bump the subject index.
if accInProto {
subjIdx++
}
c.mu.Unlock()
switch len(args) {
case subjIdx + 1:
case subjIdx + 2:
queue = args[subjIdx+1]
default:
return _EMPTY_, nil, nil, fmt.Errorf("parse error: '%s'", arg)
return nil, _EMPTY_, nil, nil, fmt.Errorf("parse error: '%s'", arg)
}
if accountName == _EMPTY_ {
accountName = string(args[0])
if accInProto {
// If there is an account in the protocol, it is before the subject.
accountName = string(args[subjIdx-1])
}
return accountName, args[subjIdx], queue, nil
return origin, accountName, args[subjIdx], queue, nil
}
// Indicates no more interest in the given account/subject for the remote side.
func (c *client) processRemoteUnsub(arg []byte) (err error) {
func (c *client) processRemoteUnsub(arg []byte, leafUnsub bool) (err error) {
srv := c.srv
if srv == nil {
return nil
}
accountName, subject, _, err := c.parseUnsubProto(arg)
var accountName string
// Assume the account will be in the protocol.
accInProto := true
c.mu.Lock()
originSupport := c.route.lnocu
if c.route != nil && len(c.route.accName) > 0 {
accountName, accInProto = string(c.route.accName), false
}
c.mu.Unlock()
hasOrigin := leafUnsub && originSupport
_, accNameFromProto, subject, _, err := c.parseUnsubProto(arg, accInProto, hasOrigin)
if err != nil {
return fmt.Errorf("processRemoteUnsub %s", err.Error())
}
if accInProto {
accountName = accNameFromProto
}
// Lookup the account
var acc *Account
if v, ok := srv.accounts.Load(accountName); ok {
@@ -1284,28 +1349,43 @@ func (c *client) processRemoteUnsub(arg []byte) (err error) {
}
updateGWs := false
// We store local subs by account and subject and optionally queue name.
// RS- will have the arg exactly as the key.
_keya := [128]byte{}
_key := _keya[:0]
var key string
if c.kind == ROUTER && c.route != nil && len(c.route.accName) > 0 {
key = accountName + " " + bytesToString(arg)
} else {
if !originSupport {
// If it is an LS- or RS-, we use the protocol as-is as the key.
key = bytesToString(arg)
} else {
// We need to prefix with the sub type.
if leafUnsub {
_key = append(_key, keyRoutedLeafSubByte)
} else {
_key = append(_key, keyRoutedSubByte)
}
_key = append(_key, ' ')
_key = append(_key, arg...)
key = bytesToString(_key)
}
delta := int32(1)
sub, ok := c.subs[key]
if ok {
delete(c.subs, key)
acc.sl.Remove(sub)
updateGWs = srv.gateway.enabled
if len(sub.queue) > 0 {
delta = sub.qw
}
}
c.mu.Unlock()
if updateGWs {
srv.gatewayUpdateSubInterest(accountName, sub, -1)
srv.gatewayUpdateSubInterest(accountName, sub, -delta)
}
// Now check on leafnode updates.
acc.updateLeafNodes(sub, -1)
acc.updateLeafNodes(sub, -delta)
if c.opts.Verbose {
c.sendOK()
@@ -1322,35 +1402,78 @@ func (c *client) processRemoteSub(argo []byte, hasOrigin bool) (err error) {
return nil
}
// Copy so we do not reference a potentially large buffer
arg := make([]byte, len(argo))
copy(arg, argo)
args := splitArg(arg)
sub := &subscription{client: c}
// This value indicate what is the mandatory subject offset in the args
// slice. It varies based on the optional presence of origin or account name
// fields (tha latter would not be present for "per-account" routes).
var subjIdx int
// If account is present, this is its "char" position in arg slice.
var accPos int
if hasOrigin {
// Set to 1, will be adjusted if the account is also expected.
subjIdx = 1
sub.origin = args[0]
// The account would start after the origin and trailing space.
accPos = len(sub.origin) + 1
}
// We copy `argo` to not reference the read buffer. However, we will
// prefix with a code that says if the remote sub is for a leaf
// (hasOrigin == true) or not to prevent key collisions. Imagine:
// "RS+ foo bar baz 1\r\n" => "foo bar baz" (a routed queue sub)
// "LS+ foo bar baz\r\n" => "foo bar baz" (a route leaf sub on "baz",
// for account "bar" with origin "foo").
//
// The sub.sid/key will be set respectively to "R foo bar baz" and
// "L foo bar baz".
//
// We also no longer add the account if it was not present (due to
// pinned-account route) since there is no need really.
//
// For routes to older server, we will still create the "arg" with
// the above layout, but we will create the sub.sid/key as before,
// that is, not including the origin for LS+ because older server
// only send LS- without origin, so we would not be able to find
// the sub in the map.
c.mu.Lock()
accountName := string(c.route.accName)
oldStyle := !c.route.lnocu
c.mu.Unlock()
// If the route is dedicated to an account, accountName will not
// be empty. If it is, then the account must be in the protocol.
var accInProto bool
if accountName == _EMPTY_ {
// Indicate if the account name should be in the protocol. It would be the
// case if accountName is empty.
accInProto := accountName == _EMPTY_
// Copy so we do not reference a potentially large buffer.
// Add 2 more bytes for the routed sub type.
arg := make([]byte, 0, 2+len(argo))
if hasOrigin {
arg = append(arg, keyRoutedLeafSubByte)
} else {
arg = append(arg, keyRoutedSubByte)
}
arg = append(arg, ' ')
arg = append(arg, argo...)
// Now split to get all fields. Unroll splitArgs to avoid runtime/heap issues.
a := [MAX_RSUB_ARGS][]byte{}
args := a[:0]
start := -1
for i, b := range arg {
switch b {
case ' ', '\t', '\r', '\n':
if start >= 0 {
args = append(args, arg[start:i])
start = -1
}
default:
if start < 0 {
start = i
}
}
}
if start >= 0 {
args = append(args, arg[start:])
}
delta := int32(1)
sub := &subscription{client: c}
// There will always be at least a subject, but its location will depend
// on if there is an origin, an account name, etc.. Since we know that
// we have added the sub type indicator as the first field, the subject
// position will be at minimum at index 1.
subjIdx := 1
if hasOrigin {
subjIdx++
}
if accInProto {
subjIdx++
accInProto = true
}
switch len(args) {
case subjIdx + 1:
@@ -1358,15 +1481,50 @@ func (c *client) processRemoteSub(argo []byte, hasOrigin bool) (err error) {
case subjIdx + 3:
sub.queue = args[subjIdx+1]
sub.qw = int32(parseSize(args[subjIdx+2]))
// TODO: (ik) We should have a non empty queue name and a queue
// weight >= 1. For 2.11, we may want to return an error if that
// is not the case, but for now just overwrite `delta` if queue
// weight is greater than 1 (it is possible after a reconnect/
// server restart to receive a queue weight > 1 for a new sub).
if sub.qw > 1 {
delta = sub.qw
}
default:
return fmt.Errorf("processRemoteSub Parse Error: '%s'", arg)
}
// We know that the number of fields is correct. So we can access args[] based
// on where we expect the fields to be.
// If there is an origin, it will be at index 1.
if hasOrigin {
sub.origin = args[1]
}
// For subject, use subjIdx.
sub.subject = args[subjIdx]
// If the account name is empty (not a "per-account" route), the account
// is at the index prior to the subject.
if accountName == _EMPTY_ {
// If the account name is in the protocol, it will be before the subject.
if accInProto {
accountName = bytesToString(args[subjIdx-1])
}
// Now set the sub.sid from the arg slice. However, we will have a different
// one if we use the origin or not.
start = 0
end := len(arg)
if sub.queue != nil {
// Remove the ' <weight>' from the arg length.
end -= 1 + len(args[subjIdx+2])
}
if oldStyle {
// We will start at the account (if present) or at the subject.
// We first skip the "R " or "L "
start = 2
// And if there is an origin skip that.
if hasOrigin {
start += len(sub.origin) + 1
}
// Here we are pointing at the account (if present), or at the subject.
}
sub.sid = arg[start:end]
// Lookup account while avoiding fetch.
// A slow fetch delays subsequent remote messages. It also avoids the expired check (see below).
// With all but memory resolver lookup can be delayed or fail.
@@ -1424,33 +1582,6 @@ func (c *client) processRemoteSub(argo []byte, hasOrigin bool) (err error) {
return nil
}
// We store local subs by account and subject and optionally queue name.
// If we have a queue it will have a trailing weight which we do not want.
if sub.queue != nil {
// if the account is in the protocol, we can reference directly "arg",
// otherwise, we need to allocate/construct the sid.
if accInProto {
sub.sid = arg[accPos : accPos+len(accountName)+1+len(sub.subject)+1+len(sub.queue)]
} else {
// It is unfortunate that we have to do this, but the gain of not
// having the account name in message protocols outweight the
// penalty of having to do this here for the processing of a
// subscription.
sub.sid = append(sub.sid, accountName...)
sub.sid = append(sub.sid, ' ')
sub.sid = append(sub.sid, sub.subject...)
sub.sid = append(sub.sid, ' ')
sub.sid = append(sub.sid, sub.queue...)
}
} else if accInProto {
sub.sid = arg[accPos:]
} else {
sub.sid = append(sub.sid, accountName...)
sub.sid = append(sub.sid, ' ')
sub.sid = append(sub.sid, sub.subject...)
}
key := bytesToString(sub.sid)
acc.mu.RLock()
// For routes (this can be called by leafnodes), check if the account is
// transitioning (from pool to dedicated route) and this route is not a
@@ -1465,9 +1596,11 @@ func (c *client) processRemoteSub(argo []byte, hasOrigin bool) (err error) {
}
sl := acc.sl
acc.mu.RUnlock()
// We use the sub.sid for the key of the c.subs map.
key := bytesToString(sub.sid)
osub := c.subs[key]
updateGWs := false
delta := int32(1)
if osub == nil {
c.subs[key] = sub
// Now place into the account sl.
@@ -1509,10 +1642,14 @@ func (c *client) addRouteSubOrUnsubProtoToBuf(buf []byte, accName string, sub *s
if isSubProto {
buf = append(buf, lSubBytes...)
buf = append(buf, sub.origin...)
buf = append(buf, ' ')
} else {
buf = append(buf, lUnsubBytes...)
if c.route.lnocu {
buf = append(buf, sub.origin...)
buf = append(buf, ' ')
}
}
buf = append(buf, ' ')
} else {
if isSubProto {
buf = append(buf, rSubBytes...)
@@ -1613,18 +1750,27 @@ func (s *Server) sendSubsToRoute(route *client, idx int, account string) {
for _, a := range accs {
a.mu.RLock()
for key, n := range a.rm {
var subj, qn []byte
s := strings.Split(key, " ")
subj = []byte(s[0])
if len(s) > 1 {
qn = []byte(s[1])
var origin, qn []byte
s := strings.Fields(key)
// Subject will always be the second field (index 1).
subj := stringToBytes(s[1])
// Check if the key is for a leaf (will be field 0).
forLeaf := s[0] == keyRoutedLeafSub
// For queue, if not for a leaf, we need 3 fields "R foo bar",
// but if for a leaf, we need 4 fields "L foo bar leaf_origin".
if l := len(s); (!forLeaf && l == 3) || (forLeaf && l == 4) {
qn = stringToBytes(s[2])
}
// s[0] is the subject and already as a string, so use that
if forLeaf {
// The leaf origin will be the last field.
origin = stringToBytes(s[len(s)-1])
}
// s[1] is the subject and already as a string, so use that
// instead of converting back `subj` to a string.
if !route.canImport(s[0]) {
if !route.canImport(s[1]) {
continue
}
sub := subscription{subject: subj, queue: qn, qw: n}
sub := subscription{origin: origin, subject: subj, queue: qn, qw: n}
buf = route.addRouteSubOrUnsubProtoToBuf(buf, a.Name, &sub, true)
}
a.mu.RUnlock()
@@ -2286,8 +2432,9 @@ func (s *Server) updateRouteSubscriptionMap(acc *Account, sub *subscription, del
return
}
// Create the fast key which will use the subject or 'subject<spc>queue' for queue subscribers.
key := keyFromSub(sub)
// Create the subscription key which will prevent collisions between regular
// and leaf routed subscriptions. See keyFromSubWithOrigin() for details.
key := keyFromSubWithOrigin(sub)
// Decide whether we need to send an update out to all the routes.
update := isq
@@ -2481,6 +2628,7 @@ func (s *Server) startRouteAcceptLoop() {
Domain: s.info.Domain,
Dynamic: s.isClusterNameDynamic(),
LNOC: true,
LNOCU: true,
}
// For tests that want to simulate old servers, do not set the compression
// on the INFO protocol if configured with CompressionNotSupported.
@@ -2795,6 +2943,7 @@ func (c *client) processRouteConnect(srv *Server, arg []byte, lang string) error
c.mu.Lock()
c.route.remoteID = c.opts.Name
c.route.lnoc = proto.LNOC
c.route.lnocu = proto.LNOCU
c.setRoutePermissions(perms)
c.headers = supportsHeaders && proto.Headers
c.mu.Unlock()

View File

@@ -56,6 +56,8 @@ func (sq *sendq) internalLoop() {
rply [256]byte
szb [10]byte
hdb [10]byte
_msg [4096]byte
msg = _msg[:0]
)
for s.isRunning() {
@@ -73,16 +75,18 @@ func (sq *sendq) internalLoop() {
} else {
c.pa.reply = nil
}
var msg []byte
msg = msg[:0]
if len(pm.hdr) > 0 {
c.pa.hdr = len(pm.hdr)
c.pa.hdb = append(hdb[:0], strconv.Itoa(c.pa.hdr)...)
msg = append(pm.hdr, pm.msg...)
msg = append(msg, pm.hdr...)
msg = append(msg, pm.msg...)
msg = append(msg, _CRLF_...)
} else {
c.pa.hdr = -1
c.pa.hdb = nil
msg = append(pm.msg, _CRLF_...)
msg = append(msg, pm.msg...)
msg = append(msg, _CRLF_...)
}
c.processInboundClientMsg(msg)
c.pa.szb = nil
@@ -107,16 +111,7 @@ func (sq *sendq) send(subj, rply string, hdr, msg []byte) {
}
out := outMsgPool.Get().(*outMsg)
out.subj, out.rply = subj, rply
out.hdr, out.msg = nil, nil
// We will copy these for now.
if len(hdr) > 0 {
hdr = copyBytes(hdr)
out.hdr = hdr
}
if len(msg) > 0 {
msg = copyBytes(msg)
out.msg = msg
}
out.hdr = append(out.hdr[:0], hdr...)
out.msg = append(out.msg[:0], msg...)
sq.q.push(out)
}

View File

@@ -94,6 +94,7 @@ type Info struct {
Import *SubjectPermission `json:"import,omitempty"`
Export *SubjectPermission `json:"export,omitempty"`
LNOC bool `json:"lnoc,omitempty"`
LNOCU bool `json:"lnocu,omitempty"`
InfoOnConnect bool `json:"info_on_connect,omitempty"` // When true the server will respond to CONNECT with an INFO
ConnectInfo bool `json:"connect_info,omitempty"` // When true this is the server INFO response to CONNECT
RoutePoolSize int `json:"route_pool_size,omitempty"`
@@ -140,8 +141,10 @@ type Server struct {
listenerErr error
gacc *Account
sys *internal
sysAcc atomic.Pointer[Account]
js atomic.Pointer[jetStream]
isMetaLeader atomic.Bool
jsClustered atomic.Bool
accounts sync.Map
tmpAccounts sync.Map // Temporarily stores accounts that are being built
activeAccounts int32
@@ -1280,6 +1283,7 @@ func (s *Server) configureAccounts(reloading bool) (map[string]struct{}, error)
if err == nil && s.sys != nil && acc != s.sys.account {
// sys.account.clients (including internal client)/respmap/etc... are transferred separately
s.sys.account = acc
s.sysAcc.Store(acc)
}
if err != nil {
return awcsti, fmt.Errorf("error resolving system account: %v", err)
@@ -1635,13 +1639,7 @@ func (s *Server) SetSystemAccount(accName string) error {
// SystemAccount returns the system account if set.
func (s *Server) SystemAccount() *Account {
var sacc *Account
s.mu.RLock()
if s.sys != nil {
sacc = s.sys.account
}
s.mu.RUnlock()
return sacc
return s.sysAcc.Load()
}
// GlobalAccount returns the global account.
@@ -1713,6 +1711,9 @@ func (s *Server) setSystemAccount(acc *Account) error {
s.sys.wg.Add(1)
s.mu.Unlock()
// Store in atomic for fast lookup.
s.sysAcc.Store(acc)
// Register with the account.
s.sys.client.registerWithAccount(acc)

View File

@@ -101,6 +101,7 @@ type StreamStore interface {
SubjectsState(filterSubject string) map[string]SimpleState
SubjectsTotals(filterSubject string) map[string]uint64
NumPending(sseq uint64, filter string, lastPerSubject bool) (total, validThrough uint64)
NumPendingMulti(sseq uint64, sl *Sublist, lastPerSubject bool) (total, validThrough uint64)
State() StreamState
FastState(*StreamState)
EncodedStreamState(failed uint64) (enc []byte, err error)
@@ -291,12 +292,16 @@ type DeleteRange struct {
}
func (dr *DeleteRange) State() (first, last, num uint64) {
return dr.First, dr.First + dr.Num, dr.Num
deletesAfterFirst := dr.Num
if deletesAfterFirst > 0 {
deletesAfterFirst--
}
return dr.First, dr.First + deletesAfterFirst, dr.Num
}
// Range will range over all the deleted sequences represented by this block.
func (dr *DeleteRange) Range(f func(uint64) bool) {
for seq := dr.First; seq <= dr.First+dr.Num; seq++ {
for seq := dr.First; seq < dr.First+dr.Num; seq++ {
if !f(seq) {
return
}

View File

@@ -1580,8 +1580,8 @@ func (s *Server) checkStreamCfg(config *StreamConfig, acc *Account) (StreamConfi
// Config returns the stream's configuration.
func (mset *stream) config() StreamConfig {
mset.mu.RLock()
defer mset.mu.RUnlock()
mset.cfgMu.RLock()
defer mset.cfgMu.RUnlock()
return mset.cfg
}
@@ -3536,7 +3536,6 @@ func (mset *stream) resetSourceInfo() {
}
}
// Lock should be held.
// This will do a reverse scan on startup or leader election
// searching for the starting sequence number.
// This can be slow in degenerative cases.
@@ -3575,6 +3574,15 @@ func (mset *stream) startingSequenceForSources() {
}
}()
update := func(iName string, seq uint64) {
// Only update active in case we have older ones in here that got configured out.
if si := mset.sources[iName]; si != nil {
if _, ok := seqs[iName]; !ok {
seqs[iName] = seq
}
}
}
var smv StoreMsg
for seq := state.LastSeq; seq >= state.FirstSeq; seq-- {
sm, err := mset.store.LoadMsg(seq, &smv)
@@ -3586,15 +3594,6 @@ func (mset *stream) startingSequenceForSources() {
continue
}
var update = func(iName string, seq uint64) {
// Only update active in case we have older ones in here that got configured out.
if si := mset.sources[iName]; si != nil {
if _, ok := seqs[iName]; !ok {
seqs[iName] = seq
}
}
}
streamName, iName, sseq := streamAndSeq(string(ss))
if iName == _EMPTY_ { // Pre-2.10 message header means it's a match for any source using that stream name
for _, ssi := range mset.cfg.Sources {
@@ -3676,12 +3675,17 @@ func (mset *stream) subscribeToStream() error {
} else if len(mset.cfg.Sources) > 0 && mset.sourcesConsumerSetup == nil {
// Setup the initial source infos for the sources
mset.resetSourceInfo()
// Delay the actual source consumer(s) creation(s) for after a delay
mset.sourcesConsumerSetup = time.AfterFunc(time.Duration(rand.Intn(int(500*time.Millisecond)))+100*time.Millisecond, func() {
mset.mu.Lock()
// Delay the actual source consumer(s) creation(s) for after a delay if a replicated stream.
// If it's an R1, this is done at startup and we will do inline.
if mset.cfg.Replicas == 1 {
mset.setupSourceConsumers()
mset.mu.Unlock()
})
} else {
mset.sourcesConsumerSetup = time.AfterFunc(time.Duration(rand.Intn(int(500*time.Millisecond)))+100*time.Millisecond, func() {
mset.mu.Lock()
mset.setupSourceConsumers()
mset.mu.Unlock()
})
}
}
// Check for direct get access.
// We spin up followers for clustered streams in monitorStream().
@@ -4676,11 +4680,14 @@ func (mset *stream) processJetStreamMsg(subject, reply string, hdr, msg []byte,
// Check for republish.
if republish {
const ht = "NATS/1.0\r\nNats-Stream: %s\r\nNats-Subject: %s\r\nNats-Sequence: %d\r\nNats-Time-Stamp: %s\r\nNats-Last-Sequence: %d\r\n\r\n"
const htho = "NATS/1.0\r\nNats-Stream: %s\r\nNats-Subject: %s\r\nNats-Sequence: %d\r\nNats-Time-Stamp: %s\r\nNats-Last-Sequence: %d\r\nNats-Msg-Size: %d\r\n\r\n"
// When adding to existing headers, will use the fmt.Append version so this skips the headers from above.
const hoff = 10
tsStr := time.Unix(0, ts).UTC().Format(time.RFC3339Nano)
var rpMsg []byte
if len(hdr) == 0 {
const ht = "NATS/1.0\r\nNats-Stream: %s\r\nNats-Subject: %s\r\nNats-Sequence: %d\r\nNats-Time-Stamp: %s\r\nNats-Last-Sequence: %d\r\n\r\n"
const htho = "NATS/1.0\r\nNats-Stream: %s\r\nNats-Subject: %s\r\nNats-Sequence: %d\r\nNats-Time-Stamp: %s\r\nNats-Last-Sequence: %d\r\nNats-Msg-Size: %d\r\n\r\n"
if !thdrsOnly {
hdr = fmt.Appendf(nil, ht, name, subject, seq, tsStr, tlseq)
rpMsg = copyBytes(msg)
@@ -4688,19 +4695,16 @@ func (mset *stream) processJetStreamMsg(subject, reply string, hdr, msg []byte,
hdr = fmt.Appendf(nil, htho, name, subject, seq, tsStr, tlseq, len(msg))
}
} else {
// Slow path.
hdr = genHeader(hdr, JSStream, name)
hdr = genHeader(hdr, JSSubject, subject)
hdr = genHeader(hdr, JSSequence, strconv.FormatUint(seq, 10))
hdr = genHeader(hdr, JSTimeStamp, tsStr)
hdr = genHeader(hdr, JSLastSequence, strconv.FormatUint(tlseq, 10))
// use hdr[:end:end] to make sure as we add we copy the original hdr.
end := len(hdr) - LEN_CR_LF
if !thdrsOnly {
hdr = fmt.Appendf(hdr[:end:end], ht[hoff:], name, subject, seq, tsStr, tlseq)
rpMsg = copyBytes(msg)
} else {
hdr = genHeader(hdr, JSMsgSize, strconv.Itoa(len(msg)))
hdr = fmt.Appendf(hdr[:end:end], htho[hoff:], name, subject, seq, tsStr, tlseq, len(msg))
}
}
mset.outq.send(newJSPubMsg(tsubj, _EMPTY_, _EMPTY_, copyBytes(hdr), rpMsg, nil, seq))
mset.outq.send(newJSPubMsg(tsubj, _EMPTY_, _EMPTY_, hdr, rpMsg, nil, seq))
}
// Send response here.
@@ -4819,6 +4823,9 @@ func newJSPubMsg(dsubj, subj, reply string, hdr, msg []byte, o *consumer, seq ui
if pm != nil {
m = pm.(*jsPubMsg)
buf = m.buf[:0]
if hdr != nil {
hdr = append(m.hdr[:0], hdr...)
}
} else {
m = new(jsPubMsg)
}
@@ -4847,6 +4854,9 @@ func (pm *jsPubMsg) returnToPool() {
if len(pm.buf) > 0 {
pm.buf = pm.buf[:0]
}
if len(pm.hdr) > 0 {
pm.hdr = pm.hdr[:0]
}
jsPubMsgPool.Put(pm)
}
@@ -5178,8 +5188,6 @@ func (mset *stream) stop(deleteFlag, advisory bool) error {
n.Delete()
sa = mset.sa
} else {
// Always attempt snapshot on clean exit.
n.InstallSnapshot(mset.stateSnapshotLocked())
n.Stop()
}
}

View File

@@ -50,6 +50,7 @@ func (t *SubjectTree[T]) dump(w io.Writer, n node, depth int) {
// For individual node/leaf dumps.
func (n *leaf[T]) kind() string { return "LEAF" }
func (n *node4) kind() string { return "NODE4" }
func (n *node10) kind() string { return "NODE10" }
func (n *node16) kind() string { return "NODE16" }
func (n *node48) kind() string { return "NODE48" }
func (n *node256) kind() string { return "NODE256" }

View File

@@ -0,0 +1,106 @@
// Copyright 2023-2024 The NATS Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package stree
// Node with 10 children
// This node size is for the particular case that a part of the subject is numeric
// in nature, i.e. it only needs to satisfy the range 0-9 without wasting bytes
// Order of struct fields for best memory alignment (as per govet/fieldalignment)
type node10 struct {
child [10]node
meta
key [10]byte
}
func newNode10(prefix []byte) *node10 {
nn := &node10{}
nn.setPrefix(prefix)
return nn
}
// Currently we do not keep node10 sorted or use bitfields for traversal so just add to the end.
// TODO(dlc) - We should revisit here with more detailed benchmarks.
func (n *node10) addChild(c byte, nn node) {
if n.size >= 10 {
panic("node10 full!")
}
n.key[n.size] = c
n.child[n.size] = nn
n.size++
}
func (n *node10) findChild(c byte) *node {
for i := uint16(0); i < n.size; i++ {
if n.key[i] == c {
return &n.child[i]
}
}
return nil
}
func (n *node10) isFull() bool { return n.size >= 10 }
func (n *node10) grow() node {
nn := newNode16(n.prefix)
for i := 0; i < 10; i++ {
nn.addChild(n.key[i], n.child[i])
}
return nn
}
// Deletes a child from the node.
func (n *node10) deleteChild(c byte) {
for i, last := uint16(0), n.size-1; i < n.size; i++ {
if n.key[i] == c {
// Unsorted so just swap in last one here, else nil if last.
if i < last {
n.key[i] = n.key[last]
n.child[i] = n.child[last]
n.key[last] = 0
n.child[last] = nil
} else {
n.key[i] = 0
n.child[i] = nil
}
n.size--
return
}
}
}
// Shrink if needed and return new node, otherwise return nil.
func (n *node10) shrink() node {
if n.size > 4 {
return nil
}
nn := newNode4(nil)
for i := uint16(0); i < n.size; i++ {
nn.addChild(n.key[i], n.child[i])
}
return nn
}
// Iterate over all children calling func f.
func (n *node10) iter(f func(node) bool) {
for i := uint16(0); i < n.size; i++ {
if !f(n.child[i]) {
return
}
}
}
// Return our children as a slice.
func (n *node10) children() []node {
return n.child[:n.size]
}

View File

@@ -79,10 +79,10 @@ func (n *node16) deleteChild(c byte) {
// Shrink if needed and return new node, otherwise return nil.
func (n *node16) shrink() node {
if n.size > 4 {
if n.size > 10 {
return nil
}
nn := newNode4(nil)
nn := newNode10(nil)
for i := uint16(0); i < n.size; i++ {
nn.addChild(n.key[i], n.child[i])
}

View File

@@ -49,7 +49,7 @@ func (n *node4) findChild(c byte) *node {
func (n *node4) isFull() bool { return n.size >= 4 }
func (n *node4) grow() node {
nn := newNode16(n.prefix)
nn := newNode10(n.prefix)
for i := 0; i < 4; i++ {
nn.addChild(n.key[i], n.child[i])
}

View File

@@ -283,7 +283,7 @@ func (t *SubjectTree[T]) delete(np *node, subject []byte, si int) (*T, bool) {
func (t *SubjectTree[T]) match(n node, parts [][]byte, pre []byte, cb func(subject []byte, val *T)) {
// Capture if we are sitting on a terminal fwc.
var hasFWC bool
if lp := len(parts); lp > 0 && parts[lp-1][0] == fwc {
if lp := len(parts); lp > 0 && len(parts[lp-1]) > 0 && parts[lp-1][0] == fwc {
hasFWC = true
}

View File

@@ -20,6 +20,8 @@ import (
"sync"
"sync/atomic"
"unicode/utf8"
"github.com/nats-io/nats-server/v2/server/stree"
)
// Sublist is a routing mechanism to handle subject distribution and
@@ -1731,3 +1733,44 @@ func getAllNodes(l *level, results *SublistResult) {
getAllNodes(n.next, results)
}
}
// IntersectStree will match all items in the given subject tree that
// have interest expressed in the given sublist. The callback will only be called
// once for each subject, regardless of overlapping subscriptions in the sublist.
func IntersectStree[T any](st *stree.SubjectTree[T], sl *Sublist, cb func(subj []byte, entry *T)) {
var _subj [255]byte
intersectStree(st, sl.root, _subj[:0], cb)
}
func intersectStree[T any](st *stree.SubjectTree[T], r *level, subj []byte, cb func(subj []byte, entry *T)) {
if r.numNodes() == 0 {
st.Match(subj, cb)
return
}
nsubj := subj
if len(nsubj) > 0 {
nsubj = append(subj, '.')
}
switch {
case r.fwc != nil:
// We've reached a full wildcard, do a FWC match on the stree at this point
// and don't keep iterating downward.
nsubj := append(nsubj, '>')
st.Match(nsubj, cb)
case r.pwc != nil:
// We've found a partial wildcard. We'll keep iterating downwards, but first
// check whether there's interest at this level (without triggering dupes) and
// match if so.
nsubj := append(nsubj, '*')
if len(r.pwc.psubs)+len(r.pwc.qsubs) > 0 && r.pwc.next != nil && r.pwc.next.numNodes() > 0 {
st.Match(nsubj, cb)
}
intersectStree(st, r.pwc.next, nsubj, cb)
case r.numNodes() > 0:
// Normal node with subject literals, keep iterating.
for t, n := range r.nodes {
nsubj := append(nsubj, t...)
intersectStree(st, n.next, nsubj, cb)
}
}
}

View File

@@ -67,7 +67,6 @@ const (
wsCloseStatusProtocolError = 1002
wsCloseStatusUnsupportedData = 1003
wsCloseStatusNoStatusReceived = 1005
wsCloseStatusAbnormalClosure = 1006
wsCloseStatusInvalidPayloadData = 1007
wsCloseStatusPolicyViolation = 1008
wsCloseStatusMessageTooBig = 1009
@@ -458,9 +457,21 @@ func (c *client) wsHandleControlFrame(r *wsReadInfo, frameType wsOpCode, nc io.R
}
}
}
clm := wsCreateCloseMessage(status, body)
// If the status indicates that nothing was received, then we don't
// send anything back.
// From https://datatracker.ietf.org/doc/html/rfc6455#section-7.4
// it says that code 1005 is a reserved value and MUST NOT be set as a
// status code in a Close control frame by an endpoint. It is
// designated for use in applications expecting a status code to indicate
// that no status code was actually present.
var clm []byte
if status != wsCloseStatusNoStatusReceived {
clm = wsCreateCloseMessage(status, body)
}
c.wsEnqueueControlMessage(wsCloseMessage, clm)
nbPoolPut(clm) // wsEnqueueControlMessage has taken a copy.
if len(clm) > 0 {
nbPoolPut(clm) // wsEnqueueControlMessage has taken a copy.
}
// Return io.EOF so that readLoop will close the connection as ClientClosed
// after processing pending buffers.
return pos, io.EOF
@@ -647,10 +658,11 @@ func (c *client) wsEnqueueCloseMessage(reason ClosedState) {
status = wsCloseStatusProtocolError
case MaxPayloadExceeded:
status = wsCloseStatusMessageTooBig
case ServerShutdown:
case WriteError, ReadError, StaleConnection, ServerShutdown:
// We used to have WriteError, ReadError and StaleConnection result in
// code 1006, which the spec says that it must not be used to set the
// status in the close message. So using this one instead.
status = wsCloseStatusGoingAway
case WriteError, ReadError, StaleConnection:
status = wsCloseStatusAbnormalClosure
default:
status = wsCloseStatusInternalSrvError
}
@@ -1316,7 +1328,19 @@ func (c *client) wsCollapsePtoNB() (net.Buffers, int64) {
}
var csz int
for _, b := range nb {
cp.Write(b)
for len(b) > 0 {
n, err := cp.Write(b)
if err != nil {
if err == io.EOF {
break
}
c.Errorf("Error during compression: %v", err)
c.markConnAsClosed(WriteError)
nbPoolPut(b)
return nil, 0
}
b = b[n:]
}
nbPoolPut(b) // No longer needed as contents written to compressor.
}
if err := cp.Flush(); err != nil {

View File

@@ -1,4 +1,4 @@
// Copyright 2018-2022 The NATS Authors
// Copyright 2018-2024 The NATS Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -15,10 +15,9 @@ package nkeys
import (
"bytes"
"crypto/ed25519"
"crypto/rand"
"io"
"golang.org/x/crypto/ed25519"
)
// kp is the internal struct for a kepypair using seed.
@@ -31,7 +30,7 @@ const seedLen = 32
// CreatePair will create a KeyPair based on the rand entropy and a type/prefix byte.
func CreatePair(prefix PrefixByte) (KeyPair, error) {
return CreatePairWithRand(prefix, rand.Reader)
return CreatePairWithRand(prefix, nil)
}
// CreatePair will create a KeyPair based on the rand reader and a type/prefix byte. rand can be nil.
@@ -39,17 +38,12 @@ func CreatePairWithRand(prefix PrefixByte, rr io.Reader) (KeyPair, error) {
if prefix == PrefixByteCurve {
return CreateCurveKeysWithRand(rr)
}
if rr == nil {
rr = rand.Reader
}
var rawSeed [seedLen]byte
_, err := io.ReadFull(rr, rawSeed[:])
_, priv, err := ed25519.GenerateKey(rr)
if err != nil {
return nil, err
}
seed, err := EncodeSeed(prefix, rawSeed[:])
seed, err := EncodeSeed(prefix, priv.Seed())
if err != nil {
return nil, err
}

View File

@@ -1,4 +1,4 @@
// Copyright 2018 The NATS Authors
// Copyright 2018-2024 The NATS Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -14,10 +14,9 @@
package nkeys
import (
"crypto/ed25519"
"crypto/rand"
"io"
"golang.org/x/crypto/ed25519"
)
// A KeyPair from a public key capable of verifying only.

View File

@@ -1,4 +1,4 @@
// Copyright 2022-2023 The NATS Authors
// Copyright 2022-2024 The NATS Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -15,6 +15,7 @@ package nkeys
import (
"bytes"
"crypto/ed25519"
"crypto/rand"
"encoding/binary"
"io"
@@ -40,17 +41,18 @@ type ckp struct {
// CreateCurveKeys will create a Curve typed KeyPair.
func CreateCurveKeys() (KeyPair, error) {
return CreateCurveKeysWithRand(rand.Reader)
return CreateCurveKeysWithRand(nil)
}
// CreateCurveKeysWithRand will create a Curve typed KeyPair
// with specified rand source.
func CreateCurveKeysWithRand(rr io.Reader) (KeyPair, error) {
var kp ckp
_, err := io.ReadFull(rr, kp.seed[:])
_, priv, err := ed25519.GenerateKey(rr)
if err != nil {
return nil, err
}
kp.seed = [curveKeyLen]byte(priv.Seed())
return &kp, nil
}

View File

@@ -1,39 +0,0 @@
// Package semver is a thin forwarding layer on top of
// [golang.org/x/mod/semver]. See that package for documentation.
//
// Deprecated: use [golang.org/x/mod/semver] instead.
package semver
import "golang.org/x/mod/semver"
func IsValid(v string) bool {
return semver.IsValid(v)
}
func Canonical(v string) string {
return semver.Canonical(v)
}
func Major(v string) string {
return semver.Major(v)
}
func MajorMinor(v string) string {
return semver.MajorMinor(v)
}
func Prerelease(v string) string {
return semver.Prerelease(v)
}
func Build(v string) string {
return semver.Build(v)
}
func Compare(v, w string) int {
return semver.Compare(v, w)
}
func Max(v, w string) string {
return semver.Max(v, w)
}

11
vendor/modules.txt vendored
View File

@@ -1006,11 +1006,11 @@ github.com/mschoch/smat
# github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822
## explicit
github.com/munnerz/goautoneg
# github.com/nats-io/jwt/v2 v2.5.8
## explicit; go 1.18
# github.com/nats-io/jwt/v2 v2.7.3
## explicit; go 1.22
github.com/nats-io/jwt/v2
# github.com/nats-io/nats-server/v2 v2.10.22
## explicit; go 1.21.0
# github.com/nats-io/nats-server/v2 v2.10.24
## explicit; go 1.22
github.com/nats-io/nats-server/v2/conf
github.com/nats-io/nats-server/v2/internal/fastrand
github.com/nats-io/nats-server/v2/internal/ldap
@@ -1028,7 +1028,7 @@ github.com/nats-io/nats.go
github.com/nats-io/nats.go/encoders/builtin
github.com/nats-io/nats.go/internal/parser
github.com/nats-io/nats.go/util
# github.com/nats-io/nkeys v0.4.7
# github.com/nats-io/nkeys v0.4.9
## explicit; go 1.20
github.com/nats-io/nkeys
# github.com/nats-io/nuid v1.0.1
@@ -1679,7 +1679,6 @@ github.com/rogpeppe/go-internal/internal/syscall/windows
github.com/rogpeppe/go-internal/internal/syscall/windows/sysdll
github.com/rogpeppe/go-internal/lockedfile
github.com/rogpeppe/go-internal/lockedfile/internal/filelock
github.com/rogpeppe/go-internal/semver
# github.com/rs/cors v1.11.1
## explicit; go 1.13
github.com/rs/cors