Compare commits

..

6 Commits

Author SHA1 Message Date
LocalAI [bot]
fcecc12e57 chore: ⬆️ Update ggml-org/llama.cpp to ba3b9c8844aca35ecb40d31886686326f22d2214 (#8613)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2026-02-21 09:57:04 +01:00
Ettore Di Giacinto
51902df7ba fix: merge openresponses messages (#8615)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-02-21 09:56:43 +01:00
Ettore Di Giacinto
05f3ae31de chore: drop bark.cpp leftovers from pipelines (#8614)
Update bump_deps.yaml

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2026-02-21 09:24:09 +01:00
LocalAI [bot]
bb0924dff1 chore: ⬆️ Update ggml-org/llama.cpp to b908baf1825b1a89afef87b09e22c32af2ca6548 (#8612)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2026-02-20 23:47:47 +01:00
Richard Palethorpe
51eec4e6b8 feat(traces): Add backend traces (#8609)
Signed-off-by: Richard Palethorpe <io@richiejp.com>
2026-02-20 23:47:33 +01:00
LocalAI [bot]
462c82fad2 docs: ⬆️ update docs version mudler/LocalAI (#8611)
⬆️ Update docs version mudler/LocalAI

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2026-02-20 21:19:51 +00:00
16 changed files with 1076 additions and 27 deletions

View File

@@ -18,10 +18,6 @@ jobs:
variable: "WHISPER_CPP_VERSION"
branch: "master"
file: "backend/go/whisper/Makefile"
- repository: "PABannier/bark.cpp"
variable: "BARKCPP_VERSION"
branch: "main"
file: "Makefile"
- repository: "leejet/stable-diffusion.cpp"
variable: "STABLEDIFFUSION_GGML_VERSION"
branch: "master"

View File

@@ -1,5 +1,5 @@
LLAMA_VERSION?=11c325c6e0666a30590cde390d5746a405e536b9
LLAMA_VERSION?=ba3b9c8844aca35ecb40d31886686326f22d2214
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
CMAKE_ARGS?=

View File

@@ -2,8 +2,10 @@ package backend
import (
"fmt"
"time"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/trace"
"github.com/mudler/LocalAI/pkg/grpc"
model "github.com/mudler/LocalAI/pkg/model"
@@ -53,7 +55,7 @@ func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, modelConf
}
}
return func() ([]float32, error) {
wrappedFn := func() ([]float32, error) {
embeds, err := fn()
if err != nil {
return embeds, err
@@ -67,5 +69,48 @@ func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, modelConf
}
}
return embeds, nil
}, nil
}
if appConfig.EnableTracing {
trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems)
traceData := map[string]any{
"input_text": trace.TruncateString(s, 1000),
"input_tokens_count": len(tokens),
}
startTime := time.Now()
originalFn := wrappedFn
wrappedFn = func() ([]float32, error) {
result, err := originalFn()
duration := time.Since(startTime)
traceData["embedding_dimensions"] = len(result)
errStr := ""
if err != nil {
errStr = err.Error()
}
summary := trace.TruncateString(s, 200)
if summary == "" {
summary = fmt.Sprintf("tokens[%d]", len(tokens))
}
trace.RecordBackendTrace(trace.BackendTrace{
Timestamp: startTime,
Duration: duration,
Type: trace.BackendTraceEmbedding,
ModelName: modelConfig.Name,
Backend: modelConfig.Backend,
Summary: summary,
Error: errStr,
Data: traceData,
})
return result, err
}
}
return wrappedFn, nil
}

View File

@@ -1,7 +1,10 @@
package backend
import (
"time"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/trace"
"github.com/mudler/LocalAI/pkg/grpc/proto"
model "github.com/mudler/LocalAI/pkg/model"
@@ -36,6 +39,46 @@ func ImageGeneration(height, width, step, seed int, positive_prompt, negative_pr
return err
}
if appConfig.EnableTracing {
trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems)
traceData := map[string]any{
"positive_prompt": positive_prompt,
"negative_prompt": negative_prompt,
"height": height,
"width": width,
"step": step,
"seed": seed,
"source_image": src,
"destination": dst,
}
startTime := time.Now()
originalFn := fn
fn = func() error {
err := originalFn()
duration := time.Since(startTime)
errStr := ""
if err != nil {
errStr = err.Error()
}
trace.RecordBackendTrace(trace.BackendTrace{
Timestamp: startTime,
Duration: duration,
Type: trace.BackendTraceImageGeneration,
ModelName: modelConfig.Name,
Backend: modelConfig.Backend,
Summary: trace.TruncateString(positive_prompt, 200),
Error: errStr,
Data: traceData,
})
return err
}
}
return fn, nil
}

View File

@@ -7,11 +7,13 @@ import (
"slices"
"strings"
"sync"
"time"
"unicode/utf8"
"github.com/mudler/xlog"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/trace"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/core/services"
@@ -220,6 +222,84 @@ func ModelInference(ctx context.Context, s string, messages schema.Messages, ima
}
}
if o.EnableTracing {
trace.InitBackendTracingIfEnabled(o.TracingMaxItems)
traceData := map[string]any{
"prompt": s,
"use_tokenizer_template": c.TemplateConfig.UseTokenizerTemplate,
"chat_template": c.TemplateConfig.Chat,
"function_template": c.TemplateConfig.Functions,
"grammar": c.Grammar,
"stop_words": c.StopWords,
"streaming": tokenCallback != nil,
"images_count": len(images),
"videos_count": len(videos),
"audios_count": len(audios),
}
if len(messages) > 0 {
if msgJSON, err := json.Marshal(messages); err == nil {
traceData["messages"] = string(msgJSON)
}
}
if tools != "" {
traceData["tools"] = tools
}
if toolChoice != "" {
traceData["tool_choice"] = toolChoice
}
if reasoningJSON, err := json.Marshal(c.ReasoningConfig); err == nil {
traceData["reasoning_config"] = string(reasoningJSON)
}
traceData["functions_config"] = map[string]any{
"grammar_disabled": c.FunctionsConfig.GrammarConfig.NoGrammar,
"parallel_calls": c.FunctionsConfig.GrammarConfig.ParallelCalls,
"mixed_mode": c.FunctionsConfig.GrammarConfig.MixedMode,
"xml_format_preset": c.FunctionsConfig.XMLFormatPreset,
}
if c.Temperature != nil {
traceData["temperature"] = *c.Temperature
}
if c.TopP != nil {
traceData["top_p"] = *c.TopP
}
if c.Maxtokens != nil {
traceData["max_tokens"] = *c.Maxtokens
}
startTime := time.Now()
originalFn := fn
fn = func() (LLMResponse, error) {
resp, err := originalFn()
duration := time.Since(startTime)
traceData["response"] = resp.Response
traceData["token_usage"] = map[string]any{
"prompt": resp.Usage.Prompt,
"completion": resp.Usage.Completion,
}
errStr := ""
if err != nil {
errStr = err.Error()
}
trace.RecordBackendTrace(trace.BackendTrace{
Timestamp: startTime,
Duration: duration,
Type: trace.BackendTraceLLM,
ModelName: c.Name,
Backend: c.Backend,
Summary: trace.GenerateLLMSummary(messages, s),
Error: errStr,
Data: traceData,
})
return resp, err
}
}
return fn, nil
}

View File

@@ -3,8 +3,10 @@ package backend
import (
"context"
"fmt"
"time"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/trace"
"github.com/mudler/LocalAI/pkg/grpc/proto"
model "github.com/mudler/LocalAI/pkg/model"
)
@@ -20,7 +22,35 @@ func Rerank(request *proto.RerankRequest, loader *model.ModelLoader, appConfig *
return nil, fmt.Errorf("could not load rerank model")
}
var startTime time.Time
if appConfig.EnableTracing {
trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems)
startTime = time.Now()
}
res, err := rerankModel.Rerank(context.Background(), request)
if appConfig.EnableTracing {
errStr := ""
if err != nil {
errStr = err.Error()
}
trace.RecordBackendTrace(trace.BackendTrace{
Timestamp: startTime,
Duration: time.Since(startTime),
Type: trace.BackendTraceRerank,
ModelName: modelConfig.Name,
Backend: modelConfig.Backend,
Summary: trace.TruncateString(request.Query, 200),
Error: errStr,
Data: map[string]any{
"query": request.Query,
"documents_count": len(request.Documents),
"top_n": request.TopN,
},
})
}
return res, err
}

View File

@@ -5,8 +5,10 @@ import (
"fmt"
"os"
"path/filepath"
"time"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/trace"
"github.com/mudler/LocalAI/pkg/grpc/proto"
"github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/LocalAI/pkg/utils"
@@ -92,7 +94,51 @@ func SoundGeneration(
req.Instrumental = instrumental
}
var startTime time.Time
if appConfig.EnableTracing {
trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems)
startTime = time.Now()
}
res, err := soundGenModel.SoundGeneration(context.Background(), req)
if appConfig.EnableTracing {
errStr := ""
if err != nil {
errStr = err.Error()
} else if res != nil && !res.Success {
errStr = fmt.Sprintf("sound generation error: %s", res.Message)
}
summary := trace.TruncateString(text, 200)
if summary == "" && caption != "" {
summary = trace.TruncateString(caption, 200)
}
traceData := map[string]any{
"text": text,
"caption": caption,
"lyrics": lyrics,
}
if duration != nil {
traceData["duration"] = *duration
}
if temperature != nil {
traceData["temperature"] = *temperature
}
trace.RecordBackendTrace(trace.BackendTrace{
Timestamp: startTime,
Duration: time.Since(startTime),
Type: trace.BackendTraceSoundGeneration,
ModelName: modelConfig.Name,
Backend: modelConfig.Backend,
Summary: summary,
Error: errStr,
Data: traceData,
})
}
if err != nil {
return "", nil, err
}

View File

@@ -1,7 +1,10 @@
package backend
import (
"time"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/trace"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/grpc"
"github.com/mudler/LocalAI/pkg/model"
@@ -21,8 +24,41 @@ func ModelTokenize(s string, loader *model.ModelLoader, modelConfig config.Model
predictOptions := gRPCPredictOpts(modelConfig, loader.ModelPath)
predictOptions.Prompt = s
var startTime time.Time
if appConfig.EnableTracing {
trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems)
startTime = time.Now()
}
// tokenize the string
resp, err := inferenceModel.TokenizeString(appConfig.Context, predictOptions)
if appConfig.EnableTracing {
errStr := ""
if err != nil {
errStr = err.Error()
}
tokenCount := 0
if resp.Tokens != nil {
tokenCount = len(resp.Tokens)
}
trace.RecordBackendTrace(trace.BackendTrace{
Timestamp: startTime,
Duration: time.Since(startTime),
Type: trace.BackendTraceTokenize,
ModelName: modelConfig.Name,
Backend: modelConfig.Backend,
Summary: trace.TruncateString(s, 200),
Error: errStr,
Data: map[string]any{
"input_text": trace.TruncateString(s, 1000),
"token_count": tokenCount,
},
})
}
if err != nil {
return schema.TokenizeResponse{}, err
}

View File

@@ -6,6 +6,7 @@ import (
"time"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/trace"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/grpc/proto"
@@ -28,6 +29,12 @@ func ModelTranscription(audio, language string, translate, diarize bool, prompt
return nil, fmt.Errorf("could not load transcription model")
}
var startTime time.Time
if appConfig.EnableTracing {
trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems)
startTime = time.Now()
}
r, err := transcriptionModel.AudioTranscription(context.Background(), &proto.TranscriptRequest{
Dst: audio,
Language: language,
@@ -37,6 +44,24 @@ func ModelTranscription(audio, language string, translate, diarize bool, prompt
Prompt: prompt,
})
if err != nil {
if appConfig.EnableTracing {
trace.RecordBackendTrace(trace.BackendTrace{
Timestamp: startTime,
Duration: time.Since(startTime),
Type: trace.BackendTraceTranscription,
ModelName: modelConfig.Name,
Backend: modelConfig.Backend,
Summary: trace.TruncateString(audio, 200),
Error: err.Error(),
Data: map[string]any{
"audio_file": audio,
"language": language,
"translate": translate,
"diarize": diarize,
"prompt": prompt,
},
})
}
return nil, err
}
tr := &schema.TranscriptionResult{
@@ -57,5 +82,26 @@ func ModelTranscription(audio, language string, translate, diarize bool, prompt
Speaker: s.Speaker,
})
}
if appConfig.EnableTracing {
trace.RecordBackendTrace(trace.BackendTrace{
Timestamp: startTime,
Duration: time.Since(startTime),
Type: trace.BackendTraceTranscription,
ModelName: modelConfig.Name,
Backend: modelConfig.Backend,
Summary: trace.TruncateString(audio+" -> "+tr.Text, 200),
Data: map[string]any{
"audio_file": audio,
"language": language,
"translate": translate,
"diarize": diarize,
"prompt": prompt,
"result_text": tr.Text,
"segments_count": len(tr.Segments),
},
})
}
return tr, err
}

View File

@@ -8,8 +8,10 @@ import (
"fmt"
"os"
"path/filepath"
"time"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/trace"
laudio "github.com/mudler/LocalAI/pkg/audio"
"github.com/mudler/LocalAI/pkg/grpc/proto"
@@ -60,6 +62,12 @@ func ModelTTS(
modelPath = modelConfig.Model // skip this step if it fails?????
}
var startTime time.Time
if appConfig.EnableTracing {
trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems)
startTime = time.Now()
}
res, err := ttsModel.TTS(context.Background(), &proto.TTSRequest{
Text: text,
Model: modelPath,
@@ -67,6 +75,31 @@ func ModelTTS(
Dst: filePath,
Language: &language,
})
if appConfig.EnableTracing {
errStr := ""
if err != nil {
errStr = err.Error()
} else if !res.Success {
errStr = fmt.Sprintf("TTS error: %s", res.Message)
}
trace.RecordBackendTrace(trace.BackendTrace{
Timestamp: startTime,
Duration: time.Since(startTime),
Type: trace.BackendTraceTTS,
ModelName: modelConfig.Name,
Backend: modelConfig.Backend,
Summary: trace.TruncateString(text, 200),
Error: errStr,
Data: map[string]any{
"text": text,
"voice": voice,
"language": language,
},
})
}
if err != nil {
return "", nil, err
}
@@ -115,6 +148,12 @@ func ModelTTSStream(
modelPath = modelConfig.Model // skip this step if it fails?????
}
var startTime time.Time
if appConfig.EnableTracing {
trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems)
startTime = time.Now()
}
var sampleRate uint32 = 16000 // default
headerSent := false
var callbackErr error
@@ -171,6 +210,34 @@ func ModelTTSStream(
}
})
resultErr := err
if callbackErr != nil {
resultErr = callbackErr
}
if appConfig.EnableTracing {
errStr := ""
if resultErr != nil {
errStr = resultErr.Error()
}
trace.RecordBackendTrace(trace.BackendTrace{
Timestamp: startTime,
Duration: time.Since(startTime),
Type: trace.BackendTraceTTS,
ModelName: modelConfig.Name,
Backend: modelConfig.Backend,
Summary: trace.TruncateString(text, 200),
Error: errStr,
Data: map[string]any{
"text": text,
"voice": voice,
"language": language,
"streaming": true,
},
})
}
if callbackErr != nil {
return callbackErr
}

View File

@@ -1,7 +1,10 @@
package backend
import (
"time"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/trace"
"github.com/mudler/LocalAI/pkg/grpc/proto"
model "github.com/mudler/LocalAI/pkg/model"
@@ -37,5 +40,46 @@ func VideoGeneration(height, width int32, prompt, negativePrompt, startImage, en
return err
}
if appConfig.EnableTracing {
trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems)
traceData := map[string]any{
"prompt": prompt,
"negative_prompt": negativePrompt,
"height": height,
"width": width,
"num_frames": numFrames,
"fps": fps,
"seed": seed,
"cfg_scale": cfgScale,
"step": step,
}
startTime := time.Now()
originalFn := fn
fn = func() error {
err := originalFn()
duration := time.Since(startTime)
errStr := ""
if err != nil {
errStr = err.Error()
}
trace.RecordBackendTrace(trace.BackendTrace{
Timestamp: startTime,
Duration: duration,
Type: trace.BackendTraceVideoGeneration,
ModelName: modelConfig.Name,
Backend: modelConfig.Backend,
Summary: trace.TruncateString(prompt, 200),
Error: errStr,
Data: traceData,
})
return err
}
}
return fn, nil
}

View File

@@ -279,6 +279,18 @@ func convertORInputToMessages(input interface{}, cfg *config.ModelConfig) ([]sch
return nil, err
}
messages = append(messages, msg)
case "reasoning":
msg, err := convertORReasoningItemToMessage(itemMap)
if err != nil {
return nil, err
}
messages = append(messages, msg)
case "function_call":
msg, err := convertORFunctionCallItemToMessage(itemMap)
if err != nil {
return nil, err
}
messages = append(messages, msg)
case "function_call_output":
// Convert function call output to tool role message
callID, _ := itemMap["call_id"].(string)
@@ -323,12 +335,59 @@ func convertORInputToMessages(input interface{}, cfg *config.ModelConfig) ([]sch
messages = append(messages, msg)
}
}
return messages, nil
return mergeContiguousAssistantMessages(messages), nil
default:
return nil, fmt.Errorf("unsupported input type: %T", input)
}
}
// convertORReasoningItemToMessage converts an Open Responses reasoning item to an assistant Message fragment (for merging).
func convertORReasoningItemToMessage(itemMap map[string]interface{}) (schema.Message, error) {
var reasoning string
if content := itemMap["content"]; content != nil {
if s, ok := content.(string); ok {
reasoning = s
} else if parts, ok := content.([]interface{}); ok {
for _, p := range parts {
if partMap, ok := p.(map[string]interface{}); ok {
if t, _ := partMap["type"].(string); (t == "output_text" || t == "input_text") && partMap["text"] != nil {
if tStr, ok := partMap["text"].(string); ok {
reasoning += tStr
}
}
}
}
}
}
return schema.Message{Role: "assistant", Reasoning: stringPtr(reasoning)}, nil
}
// convertORFunctionCallItemToMessage converts an Open Responses function_call item to an assistant Message fragment (for merging).
func convertORFunctionCallItemToMessage(itemMap map[string]interface{}) (schema.Message, error) {
callID, _ := itemMap["call_id"].(string)
name, _ := itemMap["name"].(string)
arguments, _ := itemMap["arguments"].(string)
if callID == "" {
callID = fmt.Sprintf("call_%s", name)
}
return schema.Message{
Role: "assistant",
ToolCalls: []schema.ToolCall{{
Index: 0,
ID: callID,
Type: "function",
FunctionCall: schema.FunctionCall{Name: name, Arguments: arguments},
}},
}, nil
}
func stringPtr(s string) *string {
if s == "" {
return nil
}
return &s
}
// convertORItemToMessage converts a single ORItemField to a Message
// responseID is the ID of the response where this item was found (for logging/debugging)
func convertORItemToMessage(item *schema.ORItemField, responseID string) (schema.Message, error) {
@@ -366,19 +425,52 @@ func convertORItemToMessage(item *schema.ORItemField, responseID string) (schema
Content: outputStr,
StringContent: outputStr,
}, nil
case "reasoning":
reasoning := extractReasoningContentFromORItem(item)
return schema.Message{Role: "assistant", Reasoning: stringPtr(reasoning)}, nil
case "function_call":
callID := item.CallID
if callID == "" {
callID = fmt.Sprintf("call_%s", item.Name)
}
return schema.Message{
Role: "assistant",
ToolCalls: []schema.ToolCall{{
Index: 0,
ID: callID,
Type: "function",
FunctionCall: schema.FunctionCall{Name: item.Name, Arguments: item.Arguments},
}},
}, nil
default:
return schema.Message{}, fmt.Errorf("unsupported item type for conversion: %s (from response %s)", item.Type, responseID)
}
}
// convertOROutputItemsToMessages converts Open Responses output items to internal Messages
func extractReasoningContentFromORItem(item *schema.ORItemField) string {
if contentParts, ok := item.Content.([]schema.ORContentPart); ok {
var s string
for _, part := range contentParts {
if part.Type == "output_text" || part.Type == "input_text" {
s += part.Text
}
}
return s
}
if s, ok := item.Content.(string); ok {
return s
}
return ""
}
// convertOROutputItemsToMessages converts Open Responses output items to internal Messages.
// Contiguous assistant items (message, reasoning, function_call) are merged into a single message.
func convertOROutputItemsToMessages(outputItems []schema.ORItemField) ([]schema.Message, error) {
var messages []schema.Message
for _, item := range outputItems {
switch item.Type {
case "message":
// Convert message item to assistant message
var textContent string
if contentParts, ok := item.Content.([]schema.ORContentPart); ok && len(contentParts) > 0 {
for _, part := range contentParts {
@@ -392,9 +484,23 @@ func convertOROutputItemsToMessages(outputItems []schema.ORItemField) ([]schema.
StringContent: textContent,
Content: textContent,
})
case "reasoning":
reasoning := extractReasoningContentFromORItem(&item)
messages = append(messages, schema.Message{Role: "assistant", Reasoning: stringPtr(reasoning)})
case "function_call":
// Function calls are handled separately - they become tool calls in the next turn
// For now, we skip them as they're part of the model's output, not input
msg := schema.Message{
Role: "assistant",
ToolCalls: []schema.ToolCall{{
Index: 0,
ID: item.CallID,
Type: "function",
FunctionCall: schema.FunctionCall{Name: item.Name, Arguments: item.Arguments},
}},
}
if msg.ToolCalls[0].ID == "" {
msg.ToolCalls[0].ID = fmt.Sprintf("call_%s", item.Name)
}
messages = append(messages, msg)
case "function_call_output":
// Convert function call output to tool role message
var outputStr string
@@ -414,7 +520,74 @@ func convertOROutputItemsToMessages(outputItems []schema.ORItemField) ([]schema.
}
}
return messages, nil
return mergeContiguousAssistantMessages(messages), nil
}
// mergeContiguousAssistantMessages merges contiguous assistant messages into one.
// Many chat templates expect content, reasoning, and tool calls in a single assistant message
// (see e.g. llama.cpp PR 19773). This avoids creating separate messages per input item.
func mergeContiguousAssistantMessages(messages []schema.Message) []schema.Message {
if len(messages) == 0 {
return messages
}
var out []schema.Message
var acc *schema.Message
for i := range messages {
m := &messages[i]
if m.Role != "assistant" {
flushAssistantAccumulator(&out, &acc)
out = append(out, *m)
continue
}
if acc == nil {
acc = &schema.Message{Role: "assistant"}
}
if m.StringContent != "" {
if acc.StringContent != "" {
acc.StringContent += "\n" + m.StringContent
} else {
acc.StringContent = m.StringContent
}
if acc.Content == nil {
acc.Content = m.Content
} else if _, ok := m.Content.(string); ok {
acc.Content = acc.StringContent
}
}
if m.Reasoning != nil && *m.Reasoning != "" {
if acc.Reasoning == nil {
acc.Reasoning = m.Reasoning
} else {
combined := *acc.Reasoning + "\n" + *m.Reasoning
acc.Reasoning = &combined
}
}
if len(m.ToolCalls) > 0 {
acc.ToolCalls = append(acc.ToolCalls, m.ToolCalls...)
}
}
flushAssistantAccumulator(&out, &acc)
return out
}
func flushAssistantAccumulator(out *[]schema.Message, acc **schema.Message) {
if acc == nil || *acc == nil {
return
}
m := *acc
if m.StringContent == "" && (m.Reasoning == nil || *m.Reasoning == "") && len(m.ToolCalls) == 0 {
*acc = nil
return
}
if m.Content == nil {
m.Content = m.StringContent
}
// Re-index tool calls after merge (each may have been 0)
for i := range m.ToolCalls {
m.ToolCalls[i].Index = i
}
*out = append(*out, *m)
*acc = nil
}
// convertORMessageItem converts an Open Responses message item to internal Message

View File

@@ -7,6 +7,7 @@ import (
"github.com/mudler/LocalAI/core/http/endpoints/localai"
"github.com/mudler/LocalAI/core/http/middleware"
"github.com/mudler/LocalAI/core/services"
"github.com/mudler/LocalAI/core/trace"
"github.com/mudler/LocalAI/internal"
"github.com/mudler/LocalAI/pkg/model"
)
@@ -430,4 +431,13 @@ func RegisterUIRoutes(app *echo.Echo,
return c.NoContent(204)
})
app.GET("/api/backend-traces", func(c echo.Context) error {
return c.JSON(200, trace.GetBackendTraces())
})
app.POST("/api/backend-traces/clear", func(c echo.Context) error {
trace.ClearBackendTraces()
return c.NoContent(204)
})
}

View File

@@ -5,7 +5,7 @@
<body class="bg-[var(--color-bg-primary)] text-[var(--color-text-primary)]">
<div class="app-layout">
{{template "views/partials/navbar" .}}
<main class="main-content">
<div class="main-content-inner" x-data="tracesApp()" x-init="init()">
@@ -40,10 +40,10 @@
<div class="hero-section">
<div class="hero-content">
<h1 class="hero-title">
API Traces
Traces
</h1>
<p class="hero-subtitle">View logged API requests and responses</p>
<div class="flex flex-wrap justify-center gap-2">
<p class="hero-subtitle">View logged API requests, responses, and backend operations</p>
<div class="flex flex-wrap justify-center gap-2" x-show="activeTab === 'api'">
<button type="button" @click="clearTraces()" class="inline-flex items-center gap-1.5 text-xs text-[var(--color-text-secondary)] hover:text-[var(--color-primary)] bg-transparent hover:bg-[var(--color-primary)]/10 border border-[var(--color-border-subtle)] hover:border-[var(--color-primary)]/30 rounded-md py-1.5 px-2.5 transition-colors">
<i class="fas fa-trash text-[10px]"></i>
<span>Clear Traces</span>
@@ -53,16 +53,42 @@
<span>Export Traces</span>
</a>
</div>
<div class="flex flex-wrap justify-center gap-2" x-show="activeTab === 'backend'">
<button type="button" @click="clearBackendTraces()" class="inline-flex items-center gap-1.5 text-xs text-[var(--color-text-secondary)] hover:text-[var(--color-primary)] bg-transparent hover:bg-[var(--color-primary)]/10 border border-[var(--color-border-subtle)] hover:border-[var(--color-primary)]/30 rounded-md py-1.5 px-2.5 transition-colors">
<i class="fas fa-trash text-[10px]"></i>
<span>Clear Backend Traces</span>
</button>
<a href="/api/backend-traces" download="backend-traces.json" class="inline-flex items-center gap-1.5 text-xs text-[var(--color-text-secondary)] hover:text-[var(--color-primary)] bg-transparent hover:bg-[var(--color-primary)]/10 border border-[var(--color-border-subtle)] hover:border-[var(--color-primary)]/30 rounded-md py-1.5 px-2.5 transition-colors">
<i class="fas fa-download text-[10px]"></i>
<span>Export Backend Traces</span>
</a>
</div>
</div>
</div>
<!-- Tab Bar -->
<div class="flex border-b border-[var(--color-border-subtle)] mb-6">
<button @click="switchTab('api')"
:class="activeTab === 'api' ? 'border-[var(--color-primary)] text-[var(--color-primary)]' : 'border-transparent text-[var(--color-text-secondary)] hover:text-[var(--color-text-primary)]'"
class="px-4 py-2 text-sm font-medium border-b-2 transition-colors">
<i class="fas fa-exchange-alt mr-1.5 text-xs"></i>API Traces
<span class="ml-1 text-xs opacity-70" x-text="'(' + traces.length + ')'"></span>
</button>
<button @click="switchTab('backend')"
:class="activeTab === 'backend' ? 'border-[var(--color-primary)] text-[var(--color-primary)]' : 'border-transparent text-[var(--color-text-secondary)] hover:text-[var(--color-text-primary)]'"
class="px-4 py-2 text-sm font-medium border-b-2 transition-colors">
<i class="fas fa-cogs mr-1.5 text-xs"></i>Backend Traces
<span class="ml-1 text-xs opacity-70" x-text="'(' + backendTraces.length + ')'"></span>
</button>
</div>
<!-- Tracing Settings -->
<div class="bg-[var(--color-bg-secondary)] border border-[var(--color-primary)]/20 rounded-lg p-6 mb-8">
<h2 class="text-xl font-semibold text-[var(--color-text-primary)] mb-4 flex items-center">
<i class="fas fa-bug mr-2 text-[var(--color-primary)] text-sm"></i>
Tracing Settings
</h2>
<p class="text-xs text-[var(--color-text-secondary)] mb-4">Configure API tracing</p>
<p class="text-xs text-[var(--color-text-secondary)] mb-4">Configure API and backend tracing</p>
<div class="space-y-4">
<!-- Enable Tracing -->
@@ -103,8 +129,8 @@
</div>
</div>
<!-- Traces Table -->
<div class="mt-8">
<!-- API Traces Table -->
<div class="mt-8" x-show="activeTab === 'api'">
<div class="overflow-x-auto">
<table class="w-full border-collapse">
<thead>
@@ -130,14 +156,67 @@
</template>
</tbody>
</table>
<div x-show="traces.length === 0" class="text-center py-8 text-[var(--color-text-secondary)] text-sm">
No API traces recorded yet.
</div>
</div>
</div>
<!-- Details Modal -->
<!-- Backend Traces Table -->
<div class="mt-8" x-show="activeTab === 'backend'">
<div class="overflow-x-auto">
<table class="w-full border-collapse">
<thead>
<tr class="border-b border-[var(--color-bg-secondary)]">
<th class="text-left p-2 text-xs font-semibold text-[var(--color-text-secondary)]">Type</th>
<th class="text-left p-2 text-xs font-semibold text-[var(--color-text-secondary)]">Timestamp</th>
<th class="text-left p-2 text-xs font-semibold text-[var(--color-text-secondary)]">Model</th>
<th class="text-left p-2 text-xs font-semibold text-[var(--color-text-secondary)]">Summary</th>
<th class="text-left p-2 text-xs font-semibold text-[var(--color-text-secondary)]">Duration</th>
<th class="text-left p-2 text-xs font-semibold text-[var(--color-text-secondary)]">Status</th>
<th class="text-right p-2 text-xs font-semibold text-[var(--color-text-secondary)]">Actions</th>
</tr>
</thead>
<tbody>
<template x-for="(trace, index) in backendTraces" :key="index">
<tr class="hover:bg-[var(--color-bg-secondary)]/50 border-b border-[var(--color-bg-secondary)] transition-colors">
<td class="p-2">
<span class="inline-flex items-center px-2 py-0.5 rounded text-xs font-medium"
:class="getTypeClass(trace.type)"
x-text="trace.type"></span>
</td>
<td class="p-2 text-xs text-[var(--color-text-secondary)]" x-text="formatTimestamp(trace.timestamp)"></td>
<td class="p-2 text-sm" x-text="trace.model_name || '-'"></td>
<td class="p-2 text-sm max-w-xs truncate" x-text="trace.summary || '-'"></td>
<td class="p-2 text-xs text-[var(--color-text-secondary)]" x-text="formatDuration(trace.duration)"></td>
<td class="p-2">
<template x-if="!trace.error">
<i class="fas fa-check-circle text-green-500 text-xs"></i>
</template>
<template x-if="trace.error">
<i class="fas fa-times-circle text-red-500 text-xs" :title="trace.error"></i>
</template>
</td>
<td class="p-2 text-right">
<button @click="showBackendDetails(index)" class="text-[var(--color-primary)]/60 hover:text-[var(--color-primary)] hover:bg-[var(--color-primary)]/10 rounded p-1 transition-colors">
<i class="fas fa-eye text-xs"></i>
</button>
</td>
</tr>
</template>
</tbody>
</table>
<div x-show="backendTraces.length === 0" class="text-center py-8 text-[var(--color-text-secondary)] text-sm">
No backend traces recorded yet.
</div>
</div>
</div>
<!-- API Trace Details Modal -->
<div x-show="selectedTrace !== null" class="fixed inset-0 bg-black/50 flex items-center justify-center z-50" @click="selectedTrace = null">
<div class="bg-[var(--color-bg-secondary)] rounded-lg p-6 max-w-4xl w-full max-h-[90vh] overflow-auto" @click.stop>
<div class="flex justify-between mb-4">
<h2 class="h3">Trace Details</h2>
<h2 class="h3">API Trace Details</h2>
<button @click="selectedTrace = null" class="text-[var(--color-text-secondary)] hover:text-[var(--color-text-primary)]">
<i class="fas fa-times"></i>
</button>
@@ -155,6 +234,96 @@
</div>
</div>
<!-- Backend Trace Details Modal -->
<div x-show="selectedBackendTrace !== null" class="fixed inset-0 bg-black/50 flex items-center justify-center z-50" @click="selectedBackendTrace = null; detailKey = null; detailValue = null;">
<div class="bg-[var(--color-bg-secondary)] rounded-lg p-6 max-w-4xl w-full max-h-[90vh] overflow-auto" @click.stop>
<template x-if="selectedBackendTrace !== null">
<div>
<div class="flex justify-between mb-4">
<h2 class="h3">Backend Trace Details</h2>
<button @click="selectedBackendTrace = null; detailKey = null; detailValue = null;" class="text-[var(--color-text-secondary)] hover:text-[var(--color-text-primary)]">
<i class="fas fa-times"></i>
</button>
</div>
<!-- Header info -->
<div class="grid grid-cols-4 gap-4 mb-4">
<div class="bg-[var(--color-bg-primary)] rounded p-3">
<div class="text-xs text-[var(--color-text-secondary)] mb-1">Type</div>
<span class="inline-flex items-center px-2 py-0.5 rounded text-xs font-medium"
:class="getTypeClass(backendTraces[selectedBackendTrace].type)"
x-text="backendTraces[selectedBackendTrace].type"></span>
</div>
<div class="bg-[var(--color-bg-primary)] rounded p-3">
<div class="text-xs text-[var(--color-text-secondary)] mb-1">Model</div>
<div class="text-sm font-medium" x-text="backendTraces[selectedBackendTrace].model_name || '-'"></div>
</div>
<div class="bg-[var(--color-bg-primary)] rounded p-3">
<div class="text-xs text-[var(--color-text-secondary)] mb-1">Backend</div>
<div class="text-sm font-medium" x-text="backendTraces[selectedBackendTrace].backend || '-'"></div>
</div>
<div class="bg-[var(--color-bg-primary)] rounded p-3">
<div class="text-xs text-[var(--color-text-secondary)] mb-1">Duration</div>
<div class="text-sm font-medium" x-text="formatDuration(backendTraces[selectedBackendTrace].duration)"></div>
</div>
</div>
<!-- Error banner -->
<div x-show="backendTraces[selectedBackendTrace].error" class="bg-red-500/10 border border-red-500/30 rounded-lg p-3 mb-4">
<div class="flex items-center gap-2">
<i class="fas fa-exclamation-triangle text-red-500 text-sm"></i>
<span class="text-sm text-red-400" x-text="backendTraces[selectedBackendTrace].error"></span>
</div>
</div>
<!-- Data fields table -->
<div class="overflow-x-auto">
<table class="w-full border-collapse">
<thead>
<tr class="border-b border-[var(--color-bg-primary)]">
<th class="text-left p-2 text-xs font-semibold text-[var(--color-text-secondary)] w-1/4">Field</th>
<th class="text-left p-2 text-xs font-semibold text-[var(--color-text-secondary)]">Value</th>
</tr>
</thead>
<tbody>
<template x-for="[key, value] in getDataEntries(selectedBackendTrace)" :key="key">
<tr class="border-b border-[var(--color-bg-primary)] hover:bg-[var(--color-bg-primary)]/50 transition-colors">
<td class="p-2 text-sm font-mono text-[var(--color-primary)]" x-text="key"></td>
<td class="p-2 text-sm">
<template x-if="isLargeValue(value)">
<button @click="showValueDetail(key, value)"
class="text-left max-w-full">
<span class="block truncate max-w-lg text-[var(--color-text-secondary)]" x-text="truncateValue(value, 120)"></span>
<span class="text-xs text-[var(--color-primary)] hover:underline mt-0.5 inline-block">View full value</span>
</button>
</template>
<template x-if="!isLargeValue(value)">
<span class="font-mono text-xs" x-text="formatValue(value)"></span>
</template>
</td>
</tr>
</template>
</tbody>
</table>
</div>
</div>
</template>
</div>
</div>
<!-- Value Detail Modal -->
<div x-show="detailValue !== null" class="fixed inset-0 bg-black/50 flex items-center justify-center z-[60]" @click="detailValue = null; detailKey = null;">
<div class="bg-[var(--color-bg-secondary)] rounded-lg p-6 max-w-4xl w-full max-h-[90vh] overflow-auto" @click.stop>
<div class="flex justify-between mb-4">
<h2 class="h3 font-mono" x-text="detailKey"></h2>
<button @click="detailValue = null; detailKey = null;" class="text-[var(--color-text-secondary)] hover:text-[var(--color-text-primary)]">
<i class="fas fa-times"></i>
</button>
</div>
<div id="detailEditor" class="h-[70vh] border border-[var(--color-primary-border)]/20"></div>
</div>
</div>
</div>
@@ -176,21 +345,44 @@
<script>
function tracesApp() {
return {
activeTab: 'api',
traces: [],
backendTraces: [],
selectedTrace: null,
selectedBackendTrace: null,
detailKey: null,
detailValue: null,
requestEditor: null,
responseEditor: null,
detailEditor: null,
notifications: [],
settings: {
enable_tracing: false,
tracing_max_items: 0
},
saving: false,
refreshInterval: null,
init() {
this.loadTracingSettings();
this.fetchTraces();
setInterval(() => this.fetchTraces(), 5000);
this.fetchBackendTraces();
this.startAutoRefresh();
},
switchTab(tab) {
this.activeTab = tab;
},
startAutoRefresh() {
if (this.refreshInterval) clearInterval(this.refreshInterval);
this.refreshInterval = setInterval(() => {
if (this.activeTab === 'api') {
this.fetchTraces();
} else {
this.fetchBackendTraces();
}
}, 5000);
},
async loadTracingSettings() {
@@ -261,17 +453,37 @@ function tracesApp() {
},
async fetchTraces() {
const response = await fetch('/api/traces');
this.traces = await response.json();
try {
const response = await fetch('/api/traces');
this.traces = await response.json();
} catch (e) {
console.error('Error fetching API traces:', e);
}
},
async fetchBackendTraces() {
try {
const response = await fetch('/api/backend-traces');
this.backendTraces = await response.json();
} catch (e) {
console.error('Error fetching backend traces:', e);
}
},
async clearTraces() {
if (confirm('Clear all traces?')) {
if (confirm('Clear all API traces?')) {
await fetch('/api/traces/clear', { method: 'POST' });
this.traces = [];
}
},
async clearBackendTraces() {
if (confirm('Clear all backend traces?')) {
await fetch('/api/backend-traces/clear', { method: 'POST' });
this.backendTraces = [];
}
},
showDetails(index) {
this.selectedTrace = index;
this.$nextTick(() => {
@@ -326,6 +538,97 @@ function tracesApp() {
this.responseEditor.setValue(resBody);
}
});
},
showBackendDetails(index) {
this.selectedBackendTrace = index;
},
showValueDetail(key, value) {
this.detailKey = key;
let formatted = '';
if (typeof value === 'string') {
try {
const parsed = JSON.parse(value);
formatted = JSON.stringify(parsed, null, 2);
} catch {
formatted = value;
}
} else if (typeof value === 'object') {
formatted = JSON.stringify(value, null, 2);
} else {
formatted = String(value);
}
this.detailValue = formatted;
this.$nextTick(() => {
const el = document.getElementById('detailEditor');
if (el) {
el.innerHTML = '';
this.detailEditor = CodeMirror(el, {
value: formatted,
mode: 'javascript',
json: true,
theme: 'default',
lineNumbers: true,
readOnly: true,
lineWrapping: true
});
}
});
},
formatTimestamp(ts) {
if (!ts) return '-';
const d = new Date(ts);
return d.toLocaleTimeString() + '.' + String(d.getMilliseconds()).padStart(3, '0');
},
formatDuration(ns) {
if (!ns) return '-';
const ms = ns / 1000000;
if (ms < 1000) return ms.toFixed(1) + 'ms';
return (ms / 1000).toFixed(2) + 's';
},
getTypeClass(type) {
const classes = {
'llm': 'bg-blue-500/20 text-blue-400',
'embedding': 'bg-purple-500/20 text-purple-400',
'transcription': 'bg-yellow-500/20 text-yellow-400',
'image_generation': 'bg-green-500/20 text-green-400',
'video_generation': 'bg-pink-500/20 text-pink-400',
'tts': 'bg-orange-500/20 text-orange-400',
'sound_generation': 'bg-teal-500/20 text-teal-400',
'rerank': 'bg-indigo-500/20 text-indigo-400',
'tokenize': 'bg-gray-500/20 text-gray-400',
};
return classes[type] || 'bg-gray-500/20 text-gray-400';
},
isLargeValue(value) {
if (typeof value === 'string') return value.length > 120;
if (typeof value === 'object') return JSON.stringify(value).length > 120;
return false;
},
truncateValue(value, maxLen) {
let str = typeof value === 'object' ? JSON.stringify(value) : String(value);
if (str.length <= maxLen) return str;
return str.substring(0, maxLen) + '...';
},
formatValue(value) {
if (value === null || value === undefined) return 'null';
if (typeof value === 'boolean') return value ? 'true' : 'false';
if (typeof value === 'object') return JSON.stringify(value);
return String(value);
},
getDataEntries(index) {
const trace = this.backendTraces[index];
if (!trace || !trace.data) return [];
return Object.entries(trace.data);
}
}
}

130
core/trace/backend_trace.go Normal file
View File

@@ -0,0 +1,130 @@
package trace
import (
"encoding/json"
"sort"
"sync"
"time"
"github.com/emirpasic/gods/v2/queues/circularbuffer"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/xlog"
)
type BackendTraceType string
const (
BackendTraceLLM BackendTraceType = "llm"
BackendTraceEmbedding BackendTraceType = "embedding"
BackendTraceTranscription BackendTraceType = "transcription"
BackendTraceImageGeneration BackendTraceType = "image_generation"
BackendTraceVideoGeneration BackendTraceType = "video_generation"
BackendTraceTTS BackendTraceType = "tts"
BackendTraceSoundGeneration BackendTraceType = "sound_generation"
BackendTraceRerank BackendTraceType = "rerank"
BackendTraceTokenize BackendTraceType = "tokenize"
)
type BackendTrace struct {
Timestamp time.Time `json:"timestamp"`
Duration time.Duration `json:"duration"`
Type BackendTraceType `json:"type"`
ModelName string `json:"model_name"`
Backend string `json:"backend"`
Summary string `json:"summary"`
Error string `json:"error,omitempty"`
Data map[string]any `json:"data"`
}
var backendTraceBuffer *circularbuffer.Queue[*BackendTrace]
var backendMu sync.Mutex
var backendLogChan = make(chan *BackendTrace, 100)
var backendInitOnce sync.Once
func InitBackendTracingIfEnabled(maxItems int) {
backendInitOnce.Do(func() {
if maxItems <= 0 {
maxItems = 100
}
backendMu.Lock()
backendTraceBuffer = circularbuffer.New[*BackendTrace](maxItems)
backendMu.Unlock()
go func() {
for t := range backendLogChan {
backendMu.Lock()
if backendTraceBuffer != nil {
backendTraceBuffer.Enqueue(t)
}
backendMu.Unlock()
}
}()
})
}
func RecordBackendTrace(t BackendTrace) {
select {
case backendLogChan <- &t:
default:
xlog.Warn("Backend trace channel full, dropping trace")
}
}
func GetBackendTraces() []BackendTrace {
backendMu.Lock()
if backendTraceBuffer == nil {
backendMu.Unlock()
return []BackendTrace{}
}
ptrs := backendTraceBuffer.Values()
backendMu.Unlock()
traces := make([]BackendTrace, len(ptrs))
for i, p := range ptrs {
traces[i] = *p
}
sort.Slice(traces, func(i, j int) bool {
return traces[i].Timestamp.Before(traces[j].Timestamp)
})
return traces
}
func ClearBackendTraces() {
backendMu.Lock()
if backendTraceBuffer != nil {
backendTraceBuffer.Clear()
}
backendMu.Unlock()
}
func GenerateLLMSummary(messages schema.Messages, prompt string) string {
if len(messages) > 0 {
last := messages[len(messages)-1]
text := ""
switch content := last.Content.(type) {
case string:
text = content
default:
b, err := json.Marshal(content)
if err == nil {
text = string(b)
}
}
if text != "" {
return TruncateString(text, 200)
}
}
if prompt != "" {
return TruncateString(prompt, 200)
}
return ""
}
func TruncateString(s string, maxLen int) string {
if len(s) <= maxLen {
return s
}
return s[:maxLen] + "..."
}

View File

@@ -1,3 +1,3 @@
{
"version": "v3.11.0"
"version": "v3.12.0"
}