fix(protocol): avoid deadlock with concurrent connection start and close (#10140)

This commit is contained in:
Jakob Borg
2025-05-29 16:56:58 +02:00
committed by GitHub
parent 40660c5fb7
commit 3bd2bff23b
2 changed files with 41 additions and 11 deletions

View File

@@ -2384,8 +2384,14 @@ func (m *model) scheduleConnectionPromotion() {
// be called after adding new connections, and after closing a primary
// device connection.
func (m *model) promoteConnections() {
// Slice of actions to take on connections after releasing the main
// mutex. We do this so that we do not perform blocking network actions
// inside the loop, and also to avoid a possible deadlock with calling
// Start() on connections that are already executing a Close() with a
// callback into the model...
var postLockActions []func()
m.mut.Lock()
defer m.mut.Unlock()
for deviceID, connIDs := range m.deviceConnIDs {
cm, passwords := m.generateClusterConfigRLocked(deviceID)
@@ -2398,11 +2404,13 @@ func (m *model) promoteConnections() {
// on where we get ClusterConfigs from the peer.)
conn := m.connections[connIDs[0]]
l.Debugf("Promoting connection to %s at %s", deviceID.Short(), conn)
if conn.Statistics().StartedAt.IsZero() {
conn.SetFolderPasswords(passwords)
conn.Start()
}
conn.ClusterConfig(cm)
postLockActions = append(postLockActions, func() {
if conn.Statistics().StartedAt.IsZero() {
conn.SetFolderPasswords(passwords)
conn.Start()
}
conn.ClusterConfig(cm)
})
m.promotedConnID[deviceID] = connIDs[0]
}
@@ -2411,12 +2419,20 @@ func (m *model) promoteConnections() {
for _, connID := range connIDs[1:] {
conn := m.connections[connID]
if conn.Statistics().StartedAt.IsZero() {
conn.SetFolderPasswords(passwords)
conn.Start()
conn.ClusterConfig(&protocol.ClusterConfig{Secondary: true})
postLockActions = append(postLockActions, func() {
conn.SetFolderPasswords(passwords)
conn.Start()
conn.ClusterConfig(&protocol.ClusterConfig{Secondary: true})
})
}
}
}
m.mut.Unlock()
for _, action := range postLockActions {
action()
}
}
func (m *model) DownloadProgress(conn protocol.Connection, p *protocol.DownloadProgress) error {

View File

@@ -269,6 +269,15 @@ func newRawConnection(deviceID DeviceID, reader io.Reader, writer io.Writer, clo
func (c *rawConnection) Start() {
c.startStopMut.Lock()
defer c.startStopMut.Unlock()
select {
case <-c.closed:
// we have already closed the connection before starting processing
// on it.
return
default:
}
c.loopWG.Add(5)
go func() {
c.readerLoop()
@@ -291,6 +300,7 @@ func (c *rawConnection) Start() {
c.pingReceiver()
c.loopWG.Done()
}()
c.startTime = time.Now().Truncate(time.Second)
close(c.started)
}
@@ -950,9 +960,9 @@ func (c *rawConnection) Close(err error) {
// internalClose is called if there is an unexpected error during normal operation.
func (c *rawConnection) internalClose(err error) {
c.startStopMut.Lock()
defer c.startStopMut.Unlock()
c.closeOnce.Do(func() {
c.startStopMut.Lock()
l.Debugf("close connection to %s at %s due to %v", c.deviceID.Short(), c.ConnectionInfo, err)
if cerr := c.closer.Close(); cerr != nil {
l.Debugf("failed to close underlying conn %s at %s %v:", c.deviceID.Short(), c.ConnectionInfo, cerr)
@@ -974,6 +984,10 @@ func (c *rawConnection) internalClose(err error) {
<-c.dispatcherLoopStopped
}
c.startStopMut.Unlock()
// We don't want to call into the model while holding the
// startStopMut.
c.model.Closed(err)
})
}