feat(ui): Per model backend logs and various fixes (#9028)

* feat(gallery): Switch to expandable box instead of pop-over and display model files Signed-off-by: Richard Palethorpe <io@richiejp.com> * feat(ui, backends): Add individual backend logging Signed-off-by: Richard Palethorpe <io@richiejp.com> * fix(ui): Set the context settings from the model config Signed-off-by: Richard Palethorpe <io@richiejp.com> --------- Signed-off-by: Richard Palethorpe <io@richiejp.com>
2026-07-01 03:46:41 -04:00 · 2026-03-18 07:31:26 +00:00
parent eef808d921
commit 35d509d8e7
40 changed files with 1195 additions and 206 deletions
--- a/pkg/model/backend_log_store.go
+++ b/pkg/model/backend_log_store.go
@@ -0,0 +1,168 @@
+package model
+
+import (
+	"sort"
+	"sync"
+	"time"
+
+	"github.com/emirpasic/gods/v2/queues/circularbuffer"
+)
+
+// BackendLogLine represents a single line of output from a backend process.
+type BackendLogLine struct {
+	Timestamp time.Time `json:"timestamp"`
+	Stream    string    `json:"stream"` // "stdout" or "stderr"
+	Text      string    `json:"text"`
+}
+
+// backendLogBuffer wraps a circular buffer for a single model's logs
+// and tracks subscribers for real-time streaming.
+type backendLogBuffer struct {
+	mu          sync.Mutex
+	queue       *circularbuffer.Queue[BackendLogLine]
+	subscribers map[int]chan BackendLogLine
+	nextSubID   int
+}
+
+// BackendLogStore stores per-model backend process output in circular buffers
+// and supports real-time subscriptions for WebSocket streaming.
+type BackendLogStore struct {
+	mu       sync.RWMutex // protects the buffers map only
+	buffers  map[string]*backendLogBuffer
+	maxLines int
+}
+
+// NewBackendLogStore creates a new BackendLogStore with a maximum number of
+// lines retained per model.
+func NewBackendLogStore(maxLinesPerModel int) *BackendLogStore {
+	if maxLinesPerModel <= 0 {
+		maxLinesPerModel = 1000
+	}
+	return &BackendLogStore{
+		buffers:  make(map[string]*backendLogBuffer),
+		maxLines: maxLinesPerModel,
+	}
+}
+
+// getOrCreateBuffer returns the buffer for modelID, creating it if needed.
+func (s *BackendLogStore) getOrCreateBuffer(modelID string) *backendLogBuffer {
+	s.mu.RLock()
+	buf, ok := s.buffers[modelID]
+	s.mu.RUnlock()
+	if ok {
+		return buf
+	}
+
+	s.mu.Lock()
+	buf, ok = s.buffers[modelID]
+	if !ok {
+		buf = &backendLogBuffer{
+			queue:       circularbuffer.New[BackendLogLine](s.maxLines),
+			subscribers: make(map[int]chan BackendLogLine),
+		}
+		s.buffers[modelID] = buf
+	}
+	s.mu.Unlock()
+	return buf
+}
+
+// AppendLine adds a log line for the given model. The buffer is lazily created.
+// All active subscribers for this model are notified (non-blocking).
+func (s *BackendLogStore) AppendLine(modelID, stream, text string) {
+	line := BackendLogLine{
+		Timestamp: time.Now(),
+		Stream:    stream,
+		Text:      text,
+	}
+
+	buf := s.getOrCreateBuffer(modelID)
+	buf.mu.Lock()
+	buf.queue.Enqueue(line)
+	for _, ch := range buf.subscribers {
+		select {
+		case ch <- line:
+		default:
+		}
+	}
+	buf.mu.Unlock()
+}
+
+// GetLines returns a copy of all log lines for a model, or an empty slice.
+func (s *BackendLogStore) GetLines(modelID string) []BackendLogLine {
+	s.mu.RLock()
+	buf, ok := s.buffers[modelID]
+	s.mu.RUnlock()
+	if !ok {
+		return []BackendLogLine{}
+	}
+
+	buf.mu.Lock()
+	lines := buf.queue.Values()
+	buf.mu.Unlock()
+	return lines
+}
+
+// ListModels returns a sorted list of model IDs that have log buffers.
+func (s *BackendLogStore) ListModels() []string {
+	s.mu.RLock()
+	models := make([]string, 0, len(s.buffers))
+	for id := range s.buffers {
+		models = append(models, id)
+	}
+	s.mu.RUnlock()
+
+	sort.Strings(models)
+	return models
+}
+
+// Clear removes all log lines for a model but keeps the buffer entry.
+func (s *BackendLogStore) Clear(modelID string) {
+	s.mu.RLock()
+	buf, ok := s.buffers[modelID]
+	s.mu.RUnlock()
+	if !ok {
+		return
+	}
+	buf.mu.Lock()
+	buf.queue.Clear()
+	buf.mu.Unlock()
+}
+
+// Remove deletes the buffer entry for a model entirely.
+func (s *BackendLogStore) Remove(modelID string) {
+	s.mu.Lock()
+	if buf, ok := s.buffers[modelID]; ok {
+		buf.mu.Lock()
+		for _, ch := range buf.subscribers {
+			close(ch)
+		}
+		buf.mu.Unlock()
+		delete(s.buffers, modelID)
+	}
+	s.mu.Unlock()
+}
+
+// Subscribe returns a channel that receives new log lines for the given model
+// in real-time, plus an unsubscribe function. The channel has a buffer of 100
+// lines to absorb short bursts without blocking the writer.
+func (s *BackendLogStore) Subscribe(modelID string) (chan BackendLogLine, func()) {
+	ch := make(chan BackendLogLine, 100)
+
+	buf := s.getOrCreateBuffer(modelID)
+	buf.mu.Lock()
+	id := buf.nextSubID
+	buf.nextSubID++
+	buf.subscribers[id] = ch
+	buf.mu.Unlock()
+
+	unsubscribe := func() {
+		buf.mu.Lock()
+		if _, exists := buf.subscribers[id]; exists {
+			delete(buf.subscribers, id)
+			close(ch)
+		}
+		buf.mu.Unlock()
+	}
+
+	return ch, unsubscribe
+}
--- a/pkg/model/loader.go
+++ b/pkg/model/loader.go
@@ -8,6 +8,7 @@ import (
 	"path/filepath"
 	"strings"
 	"sync"
+	"sync/atomic"
 	"time"

 	"github.com/mudler/LocalAI/pkg/system"
@@ -33,6 +34,8 @@ type ModelLoader struct {
 	lruEvictionMaxRetries    int           // Maximum number of retries when waiting for busy models
 	lruEvictionRetryInterval time.Duration // Interval between retries when waiting for busy models
 	onUnloadHooks            []ModelUnloadHook
+	backendLogs              *BackendLogStore
+	backendLoggingEnabled    atomic.Bool
 }

 // NewModelLoader creates a new ModelLoader instance.
@@ -45,6 +48,7 @@ func NewModelLoader(system *system.SystemState) *ModelLoader {
 		externalBackends:         make(map[string]string),
 		lruEvictionMaxRetries:    30,              // Default: 30 retries
 		lruEvictionRetryInterval: 1 * time.Second, // Default: 1 second
+		backendLogs:              NewBackendLogStore(1000),
 	}

 	return nml
@@ -72,6 +76,18 @@ func (ml *ModelLoader) GetWatchDog() *WatchDog {
 	return ml.wd
 }

+func (ml *ModelLoader) BackendLogs() *BackendLogStore {
+	return ml.backendLogs
+}
+
+func (ml *ModelLoader) SetBackendLoggingEnabled(enabled bool) {
+	ml.backendLoggingEnabled.Store(enabled)
+}
+
+func (ml *ModelLoader) BackendLoggingEnabled() bool {
+	return ml.backendLoggingEnabled.Load()
+}
+
 // SetLRUEvictionRetrySettings updates the LRU eviction retry settings
 func (ml *ModelLoader) SetLRUEvictionRetrySettings(maxRetries int, retryInterval time.Duration) {
 	ml.mu.Lock()
--- a/pkg/model/process.go
+++ b/pkg/model/process.go
@@ -159,19 +159,27 @@ func (ml *ModelLoader) startProcess(grpcProcess, id string, serverAddress string
 	go func() {
 		t, err := tail.TailFile(grpcControlProcess.StderrPath(), tail.Config{Follow: true})
 		if err != nil {
-			xlog.Debug("Could not tail stderr")
+			xlog.Error("Could not tail stderr", "process", grpcProcess)
+			return
 		}
 		for line := range t.Lines {
 			xlog.Debug("GRPC stderr", "id", strings.Join([]string{id, serverAddress}, "-"), "line", line.Text)
+			if ml.backendLogs != nil && ml.backendLoggingEnabled.Load() {
+				ml.backendLogs.AppendLine(id, "stderr", line.Text)
+			}
 		}
 	}()
 	go func() {
 		t, err := tail.TailFile(grpcControlProcess.StdoutPath(), tail.Config{Follow: true})
 		if err != nil {
-			xlog.Debug("Could not tail stdout")
+			xlog.Error("Could not tail stdout", "process", grpcProcess)
+			return
 		}
 		for line := range t.Lines {
 			xlog.Debug("GRPC stdout", "id", strings.Join([]string{id, serverAddress}, "-"), "line", line.Text)
+			if ml.backendLogs != nil && ml.backendLoggingEnabled.Load() {
+				ml.backendLogs.AppendLine(id, "stdout", line.Text)
+			}
 		}
 	}()