From 1827e143cd93d5af8198be113cde58042cd1484f Mon Sep 17 00:00:00 2001 From: Pascal Bleser Date: Thu, 28 Aug 2025 16:44:51 +0200 Subject: [PATCH] enhancement(runtime): improve logging in supervised crashes When events occur in the supervisor, log them as FATAL when appropriate (panic and termination when not restarting) instead of always using INFO, and include more information such as the service name and the current failures, etc..., rather than the generic message that was used so far. --- opencloud/pkg/runtime/service/service.go | 28 ++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/opencloud/pkg/runtime/service/service.go b/opencloud/pkg/runtime/service/service.go index 16ff8656fb..d3ff48dab2 100644 --- a/opencloud/pkg/runtime/service/service.go +++ b/opencloud/pkg/runtime/service/service.go @@ -3,7 +3,6 @@ package service import ( "context" "errors" - "fmt" "net" "net/http" "net/rpc" @@ -382,7 +381,32 @@ func Start(ctx context.Context, o ...Option) error { cancel() } } - s.Log.Info().Str("event", e.String()).Msg(fmt.Sprintf("supervisor: %v", e.Map()["supervisor_name"])) + switch ev := e.(type) { + case suture.EventServicePanic: + l := s.Log.Fatal() + if ev.Restarting { + l = s.Log.Error() + } + l.Str("event", e.String()).Str("service", ev.ServiceName).Str("supervisor", ev.SupervisorName). + Bool("restarting", ev.Restarting).Float64("failures", ev.CurrentFailures).Float64("threshold", ev.FailureThreshold). + Str("message", ev.PanicMsg).Msg("service panic") + case suture.EventServiceTerminate: + l := s.Log.Fatal() + if ev.Restarting { + l = s.Log.Error() + } + l.Str("event", e.String()).Str("service", ev.ServiceName).Str("supervisor", ev.SupervisorName). + Bool("restarting", ev.Restarting).Float64("failures", ev.CurrentFailures).Float64("threshold", ev.FailureThreshold). + Interface("error", ev.Err).Msg("service terminated") + case suture.EventBackoff: + s.Log.Warn().Str("event", e.String()).Str("supervisor", ev.SupervisorName).Msg("service backoff") + case suture.EventResume: + s.Log.Info().Str("event", e.String()).Str("supervisor", ev.SupervisorName).Msg("service resume") + case suture.EventStopTimeout: + s.Log.Warn().Str("event", e.String()).Str("service", ev.ServiceName).Str("supervisor", ev.SupervisorName).Msg("service resume") + default: + s.Log.Warn().Str("event", e.String()).Msgf("supervisor: %v", e.Map()["supervisor_name"]) + } }, FailureThreshold: 5, FailureBackoff: 3 * time.Second,