mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-23 18:27:47 -05:00
Compare commits
10 Commits
v3.12.1
...
dependabot
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3577b02298 | ||
|
|
ed0bfb8732 | ||
|
|
be84b1d258 | ||
|
|
cbedcc9091 | ||
|
|
e45d63c86e | ||
|
|
f40c8dd0ce | ||
|
|
559ab99890 | ||
|
|
91f2dd5820 | ||
|
|
8250815763 | ||
|
|
b1b67b973e |
@@ -1,5 +1,5 @@
|
||||
|
||||
LLAMA_VERSION?=ba3b9c8844aca35ecb40d31886686326f22d2214
|
||||
LLAMA_VERSION?=2b6dfe824de8600c061ef91ce5cc5c307f97112c
|
||||
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
|
||||
|
||||
CMAKE_ARGS?=
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
grpcio==1.76.0
|
||||
grpcio==1.78.1
|
||||
protobuf==6.33.5
|
||||
certifi
|
||||
setuptools
|
||||
|
||||
@@ -71,6 +71,7 @@ type RunCMD struct {
|
||||
WatchdogIdleTimeout string `env:"LOCALAI_WATCHDOG_IDLE_TIMEOUT,WATCHDOG_IDLE_TIMEOUT" default:"15m" help:"Threshold beyond which an idle backend should be stopped" group:"backends"`
|
||||
EnableWatchdogBusy bool `env:"LOCALAI_WATCHDOG_BUSY,WATCHDOG_BUSY" default:"false" help:"Enable watchdog for stopping backends that are busy longer than the watchdog-busy-timeout" group:"backends"`
|
||||
WatchdogBusyTimeout string `env:"LOCALAI_WATCHDOG_BUSY_TIMEOUT,WATCHDOG_BUSY_TIMEOUT" default:"5m" help:"Threshold beyond which a busy backend should be stopped" group:"backends"`
|
||||
WatchdogInterval string `env:"LOCALAI_WATCHDOG_INTERVAL,WATCHDOG_INTERVAL" default:"500ms" help:"Interval between watchdog checks (e.g., 500ms, 5s, 1m) (default: 500ms)" group:"backends"`
|
||||
EnableMemoryReclaimer bool `env:"LOCALAI_MEMORY_RECLAIMER,MEMORY_RECLAIMER,LOCALAI_GPU_RECLAIMER,GPU_RECLAIMER" default:"false" help:"Enable memory threshold monitoring to auto-evict backends when memory usage exceeds threshold (uses GPU VRAM if available, otherwise RAM)" group:"backends"`
|
||||
MemoryReclaimerThreshold float64 `env:"LOCALAI_MEMORY_RECLAIMER_THRESHOLD,MEMORY_RECLAIMER_THRESHOLD,LOCALAI_GPU_RECLAIMER_THRESHOLD,GPU_RECLAIMER_THRESHOLD" default:"0.95" help:"Memory usage threshold (0.0-1.0) that triggers backend eviction (default 0.95 = 95%%)" group:"backends"`
|
||||
ForceEvictionWhenBusy bool `env:"LOCALAI_FORCE_EVICTION_WHEN_BUSY,FORCE_EVICTION_WHEN_BUSY" default:"false" help:"Force eviction even when models have active API calls (default: false for safety)" group:"backends"`
|
||||
@@ -215,6 +216,13 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
||||
}
|
||||
opts = append(opts, config.SetWatchDogBusyTimeout(dur))
|
||||
}
|
||||
if r.WatchdogInterval != "" {
|
||||
dur, err := time.ParseDuration(r.WatchdogInterval)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
opts = append(opts, config.SetWatchDogInterval(dur))
|
||||
}
|
||||
}
|
||||
|
||||
// Handle memory reclaimer (uses GPU VRAM if available, otherwise RAM)
|
||||
|
||||
@@ -31,8 +31,8 @@ type TranscriptCMD struct {
|
||||
ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
|
||||
BackendGalleries string `env:"LOCALAI_BACKEND_GALLERIES,BACKEND_GALLERIES" help:"JSON list of backend galleries" group:"backends" default:"${backends}"`
|
||||
Prompt string `short:"p" help:"Previous transcribed text or words that hint at what the model should expect"`
|
||||
ResponseFormat schema.TranscriptionResponseFormatType `short:"f" default:"" help:"Response format for Whisper models, can be one of (txt, lrc, srt, vtt, json, json_verbose)"`
|
||||
PrettyPrint bool `help:"Used with response_format json or json_verbose for pretty printing"`
|
||||
ResponseFormat schema.TranscriptionResponseFormatType `short:"f" default:"" help:"Response format for Whisper models, can be one of (txt, lrc, srt, vtt, json, verbose_json)"`
|
||||
PrettyPrint bool `help:"Used with response_format json or verbose_json for pretty printing"`
|
||||
}
|
||||
|
||||
func (t *TranscriptCMD) Run(ctx *cliContext.Context) error {
|
||||
|
||||
@@ -98,10 +98,11 @@ func NewApplicationConfig(o ...AppOption) *ApplicationConfig {
|
||||
Context: context.Background(),
|
||||
UploadLimitMB: 15,
|
||||
Debug: true,
|
||||
AgentJobRetentionDays: 30, // Default: 30 days
|
||||
LRUEvictionMaxRetries: 30, // Default: 30 retries
|
||||
LRUEvictionRetryInterval: 1 * time.Second, // Default: 1 second
|
||||
TracingMaxItems: 1024,
|
||||
AgentJobRetentionDays: 30, // Default: 30 days
|
||||
LRUEvictionMaxRetries: 30, // Default: 30 retries
|
||||
LRUEvictionRetryInterval: 1 * time.Second, // Default: 1 second
|
||||
WatchDogInterval: 500 * time.Millisecond, // Default: 500ms
|
||||
TracingMaxItems: 1024,
|
||||
PathWithoutAuth: []string{
|
||||
"/static/",
|
||||
"/generated-audio/",
|
||||
@@ -208,6 +209,12 @@ func SetWatchDogIdleTimeout(t time.Duration) AppOption {
|
||||
}
|
||||
}
|
||||
|
||||
func SetWatchDogInterval(t time.Duration) AppOption {
|
||||
return func(o *ApplicationConfig) {
|
||||
o.WatchDogInterval = t
|
||||
}
|
||||
}
|
||||
|
||||
// EnableMemoryReclaimer enables memory threshold monitoring.
|
||||
// When enabled, the watchdog will evict backends if memory usage exceeds the threshold.
|
||||
// Works with GPU VRAM if available, otherwise uses system RAM.
|
||||
@@ -642,7 +649,7 @@ func (o *ApplicationConfig) ToRuntimeSettings() RuntimeSettings {
|
||||
AutoloadBackendGalleries: &autoloadBackendGalleries,
|
||||
ApiKeys: &apiKeys,
|
||||
AgentJobRetentionDays: &agentJobRetentionDays,
|
||||
OpenResponsesStoreTTL: &openResponsesStoreTTL,
|
||||
OpenResponsesStoreTTL: &openResponsesStoreTTL,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -29,6 +29,8 @@ import (
|
||||
//go:embed static/*
|
||||
var embedDirStatic embed.FS
|
||||
|
||||
var quietPaths = []string{"/api/operations", "/api/resources", "/healthz", "/readyz"}
|
||||
|
||||
// @title LocalAI API
|
||||
// @version 2.0.0
|
||||
// @description The LocalAI Rest API.
|
||||
@@ -109,10 +111,17 @@ func API(application *application.Application) (*echo.Echo, error) {
|
||||
res := c.Response()
|
||||
err := next(c)
|
||||
|
||||
// Fix for #7989: Reduce log verbosity of Web UI polling and resources API
|
||||
// If the path is /api/operations or /api/resources and the request was successful (200),
|
||||
// we log it at DEBUG level (hidden by default) instead of INFO.
|
||||
if (req.URL.Path == "/api/operations" || req.URL.Path == "/api/resources") && res.Status == 200 {
|
||||
// Fix for #7989: Reduce log verbosity of Web UI polling, resources API, and health checks
|
||||
// These paths are logged at DEBUG level (hidden by default) instead of INFO.
|
||||
isQuietPath := false
|
||||
for _, path := range quietPaths {
|
||||
if req.URL.Path == path {
|
||||
isQuietPath = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if isQuietPath && res.Status == 200 {
|
||||
xlog.Debug("HTTP request", "method", req.Method, "path", req.URL.Path, "status", res.Status)
|
||||
} else {
|
||||
xlog.Info("HTTP request", "method", req.Method, "path", req.URL.Path, "status", res.Status)
|
||||
|
||||
@@ -1046,6 +1046,27 @@ func triggerResponse(session *Session, conv *Conversation, c *LockedWebsocket, o
|
||||
Content: content.Text,
|
||||
})
|
||||
}
|
||||
} else if item.FunctionCall != nil {
|
||||
conversationHistory = append(conversationHistory, schema.Message{
|
||||
Role: string(types.MessageRoleAssistant),
|
||||
ToolCalls: []schema.ToolCall{
|
||||
{
|
||||
ID: item.FunctionCall.CallID,
|
||||
Type: "function",
|
||||
FunctionCall: schema.FunctionCall{
|
||||
Name: item.FunctionCall.Name,
|
||||
Arguments: item.FunctionCall.Arguments,
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
} else if item.FunctionCallOutput != nil {
|
||||
conversationHistory = append(conversationHistory, schema.Message{
|
||||
Role: "tool",
|
||||
Name: item.FunctionCallOutput.CallID,
|
||||
Content: item.FunctionCallOutput.Output,
|
||||
StringContent: item.FunctionCallOutput.Output,
|
||||
})
|
||||
}
|
||||
}
|
||||
conv.Lock.Unlock()
|
||||
|
||||
@@ -135,26 +135,44 @@
|
||||
<table class="w-full border-collapse">
|
||||
<thead>
|
||||
<tr class="border-b border-[var(--color-bg-secondary)]">
|
||||
<th class="w-8 p-2"></th>
|
||||
<th class="text-left p-2 text-xs font-semibold text-[var(--color-text-secondary)]">Method</th>
|
||||
<th class="text-left p-2 text-xs font-semibold text-[var(--color-text-secondary)]">Path</th>
|
||||
<th class="text-left p-2 text-xs font-semibold text-[var(--color-text-secondary)]">Status</th>
|
||||
<th class="text-right p-2 text-xs font-semibold text-[var(--color-text-secondary)]">Actions</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<template x-for="(trace, index) in traces" :key="index">
|
||||
<tr class="hover:bg-[var(--color-bg-secondary)]/50 border-b border-[var(--color-bg-secondary)] transition-colors">
|
||||
<template x-for="(trace, index) in traces" :key="index">
|
||||
<tbody>
|
||||
<tr @click="toggleTrace(index)"
|
||||
class="cursor-pointer hover:bg-[var(--color-bg-secondary)]/50 border-b border-[var(--color-bg-secondary)] transition-colors">
|
||||
<td class="p-2 w-8 text-center">
|
||||
<i class="fas fa-chevron-right text-xs text-[var(--color-text-secondary)] transition-transform duration-200"
|
||||
:class="expandedTraces[index] ? 'rotate-90' : ''"></i>
|
||||
</td>
|
||||
<td class="p-2" x-text="trace.request.method"></td>
|
||||
<td class="p-2" x-text="trace.request.path"></td>
|
||||
<td class="p-2" x-text="trace.response.status"></td>
|
||||
<td class="p-2 text-right">
|
||||
<button @click="showDetails(index)" class="text-[var(--color-primary)]/60 hover:text-[var(--color-primary)] hover:bg-[var(--color-primary)]/10 rounded p-1 transition-colors">
|
||||
<i class="fas fa-eye text-xs"></i>
|
||||
</button>
|
||||
</tr>
|
||||
<tr x-show="expandedTraces[index]">
|
||||
<td colspan="4" class="p-0">
|
||||
<div class="p-4 bg-[var(--color-bg-secondary)]/30 border-b border-[var(--color-bg-secondary)]">
|
||||
<div class="grid grid-cols-1 lg:grid-cols-2 gap-4">
|
||||
<div>
|
||||
<h4 class="text-sm font-semibold text-[var(--color-text-primary)] mb-2">Request Body</h4>
|
||||
<pre class="overflow-auto max-h-[70vh] p-3 rounded-lg bg-[var(--color-bg-primary)] border border-[var(--color-border-subtle)] text-xs font-mono text-[var(--color-text-secondary)] whitespace-pre-wrap break-words"
|
||||
x-text="formatTraceBody(trace.request.body)"></pre>
|
||||
</div>
|
||||
<div>
|
||||
<h4 class="text-sm font-semibold text-[var(--color-text-primary)] mb-2">Response Body</h4>
|
||||
<pre class="overflow-auto max-h-[70vh] p-3 rounded-lg bg-[var(--color-bg-primary)] border border-[var(--color-border-subtle)] text-xs font-mono text-[var(--color-text-secondary)] whitespace-pre-wrap break-words"
|
||||
x-text="formatTraceBody(trace.response.body)"></pre>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
</template>
|
||||
</tbody>
|
||||
</tbody>
|
||||
</template>
|
||||
</table>
|
||||
<div x-show="traces.length === 0" class="text-center py-8 text-[var(--color-text-secondary)] text-sm">
|
||||
No API traces recorded yet.
|
||||
@@ -168,18 +186,23 @@
|
||||
<table class="w-full border-collapse">
|
||||
<thead>
|
||||
<tr class="border-b border-[var(--color-bg-secondary)]">
|
||||
<th class="w-8 p-2"></th>
|
||||
<th class="text-left p-2 text-xs font-semibold text-[var(--color-text-secondary)]">Type</th>
|
||||
<th class="text-left p-2 text-xs font-semibold text-[var(--color-text-secondary)]">Timestamp</th>
|
||||
<th class="text-left p-2 text-xs font-semibold text-[var(--color-text-secondary)]">Model</th>
|
||||
<th class="text-left p-2 text-xs font-semibold text-[var(--color-text-secondary)]">Summary</th>
|
||||
<th class="text-left p-2 text-xs font-semibold text-[var(--color-text-secondary)]">Duration</th>
|
||||
<th class="text-left p-2 text-xs font-semibold text-[var(--color-text-secondary)]">Status</th>
|
||||
<th class="text-right p-2 text-xs font-semibold text-[var(--color-text-secondary)]">Actions</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<template x-for="(trace, index) in backendTraces" :key="index">
|
||||
<tr class="hover:bg-[var(--color-bg-secondary)]/50 border-b border-[var(--color-bg-secondary)] transition-colors">
|
||||
<template x-for="(trace, index) in backendTraces" :key="index">
|
||||
<tbody>
|
||||
<tr @click="toggleBackendTrace(index)"
|
||||
class="cursor-pointer hover:bg-[var(--color-bg-secondary)]/50 border-b border-[var(--color-bg-secondary)] transition-colors">
|
||||
<td class="p-2 w-8 text-center">
|
||||
<i class="fas fa-chevron-right text-xs text-[var(--color-text-secondary)] transition-transform duration-200"
|
||||
:class="expandedBackendTraces[index] ? 'rotate-90' : ''"></i>
|
||||
</td>
|
||||
<td class="p-2">
|
||||
<span class="inline-flex items-center px-2 py-0.5 rounded text-xs font-medium"
|
||||
:class="getTypeClass(trace.type)"
|
||||
@@ -197,14 +220,82 @@
|
||||
<i class="fas fa-times-circle text-red-500 text-xs" :title="trace.error"></i>
|
||||
</template>
|
||||
</td>
|
||||
<td class="p-2 text-right">
|
||||
<button @click="showBackendDetails(index)" class="text-[var(--color-primary)]/60 hover:text-[var(--color-primary)] hover:bg-[var(--color-primary)]/10 rounded p-1 transition-colors">
|
||||
<i class="fas fa-eye text-xs"></i>
|
||||
</button>
|
||||
</tr>
|
||||
<tr x-show="expandedBackendTraces[index]">
|
||||
<td colspan="7" class="p-0">
|
||||
<div class="p-4 bg-[var(--color-bg-secondary)]/30 border-b border-[var(--color-bg-secondary)]">
|
||||
<!-- Header info -->
|
||||
<div class="grid grid-cols-2 md:grid-cols-4 gap-3 mb-4">
|
||||
<div class="bg-[var(--color-bg-primary)] rounded-lg p-3 border border-[var(--color-border-subtle)]">
|
||||
<div class="text-xs text-[var(--color-text-secondary)] mb-1">Type</div>
|
||||
<span class="inline-flex items-center px-2 py-0.5 rounded text-xs font-medium"
|
||||
:class="getTypeClass(trace.type)"
|
||||
x-text="trace.type"></span>
|
||||
</div>
|
||||
<div class="bg-[var(--color-bg-primary)] rounded-lg p-3 border border-[var(--color-border-subtle)]">
|
||||
<div class="text-xs text-[var(--color-text-secondary)] mb-1">Model</div>
|
||||
<div class="text-sm font-medium" x-text="trace.model_name || '-'"></div>
|
||||
</div>
|
||||
<div class="bg-[var(--color-bg-primary)] rounded-lg p-3 border border-[var(--color-border-subtle)]">
|
||||
<div class="text-xs text-[var(--color-text-secondary)] mb-1">Backend</div>
|
||||
<div class="text-sm font-medium" x-text="trace.backend || '-'"></div>
|
||||
</div>
|
||||
<div class="bg-[var(--color-bg-primary)] rounded-lg p-3 border border-[var(--color-border-subtle)]">
|
||||
<div class="text-xs text-[var(--color-text-secondary)] mb-1">Duration</div>
|
||||
<div class="text-sm font-medium" x-text="formatDuration(trace.duration)"></div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Error banner -->
|
||||
<div x-show="trace.error" class="bg-red-500/10 border border-red-500/30 rounded-lg p-3 mb-4">
|
||||
<div class="flex items-center gap-2">
|
||||
<i class="fas fa-exclamation-triangle text-red-500 text-sm"></i>
|
||||
<span class="text-sm text-red-400" x-text="trace.error"></span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Data fields as nested accordions -->
|
||||
<template x-if="trace.data && Object.keys(trace.data).length > 0">
|
||||
<div>
|
||||
<h4 class="text-sm font-semibold text-[var(--color-text-primary)] mb-2">Data Fields</h4>
|
||||
<div class="border border-[var(--color-border-subtle)] rounded-lg overflow-hidden">
|
||||
<template x-for="[key, value] in Object.entries(trace.data)" :key="key">
|
||||
<div class="border-b border-[var(--color-border-subtle)] last:border-b-0">
|
||||
<!-- Field header row -->
|
||||
<div @click="isLargeValue(value) && toggleBackendField(index, key)"
|
||||
class="flex items-center gap-2 px-3 py-2 hover:bg-[var(--color-bg-primary)]/50 transition-colors"
|
||||
:class="isLargeValue(value) ? 'cursor-pointer' : ''">
|
||||
<template x-if="isLargeValue(value)">
|
||||
<i class="fas fa-chevron-right text-[10px] text-[var(--color-text-secondary)] transition-transform duration-200 w-3 flex-shrink-0"
|
||||
:class="isBackendFieldExpanded(index, key) ? 'rotate-90' : ''"></i>
|
||||
</template>
|
||||
<template x-if="!isLargeValue(value)">
|
||||
<span class="w-3 flex-shrink-0"></span>
|
||||
</template>
|
||||
<span class="text-sm font-mono text-[var(--color-primary)] flex-shrink-0" x-text="key"></span>
|
||||
<template x-if="!isLargeValue(value)">
|
||||
<span class="font-mono text-xs text-[var(--color-text-secondary)]" x-text="formatValue(value)"></span>
|
||||
</template>
|
||||
<template x-if="isLargeValue(value) && !isBackendFieldExpanded(index, key)">
|
||||
<span class="text-xs text-[var(--color-text-secondary)] truncate" x-text="truncateValue(value, 120)"></span>
|
||||
</template>
|
||||
</div>
|
||||
<!-- Expanded field value -->
|
||||
<div x-show="isLargeValue(value) && isBackendFieldExpanded(index, key)"
|
||||
class="px-3 pb-3">
|
||||
<pre class="overflow-auto max-h-[70vh] p-3 rounded-lg bg-[var(--color-bg-primary)] border border-[var(--color-border-subtle)] text-xs font-mono text-[var(--color-text-secondary)] whitespace-pre-wrap break-words"
|
||||
x-text="formatLargeValue(value)"></pre>
|
||||
</div>
|
||||
</div>
|
||||
</template>
|
||||
</div>
|
||||
</div>
|
||||
</template>
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
</template>
|
||||
</tbody>
|
||||
</tbody>
|
||||
</template>
|
||||
</table>
|
||||
<div x-show="backendTraces.length === 0" class="text-center py-8 text-[var(--color-text-secondary)] text-sm">
|
||||
No backend traces recorded yet.
|
||||
@@ -212,149 +303,20 @@
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- API Trace Details Modal -->
|
||||
<div x-show="selectedTrace !== null" class="fixed inset-0 bg-black/50 flex items-center justify-center z-50" @click="selectedTrace = null">
|
||||
<div class="bg-[var(--color-bg-secondary)] rounded-lg p-6 max-w-4xl w-full max-h-[90vh] overflow-auto" @click.stop>
|
||||
<div class="flex justify-between mb-4">
|
||||
<h2 class="h3">API Trace Details</h2>
|
||||
<button @click="selectedTrace = null" class="text-[var(--color-text-secondary)] hover:text-[var(--color-text-primary)]">
|
||||
<i class="fas fa-times"></i>
|
||||
</button>
|
||||
</div>
|
||||
<div class="grid grid-cols-2 gap-4">
|
||||
<div>
|
||||
<h3 class="text-lg font-semibold mb-2">Request Body</h3>
|
||||
<div id="requestEditor" class="h-96 border border-[var(--color-primary-border)]/20"></div>
|
||||
</div>
|
||||
<div>
|
||||
<h3 class="text-lg font-semibold mb-2">Response Body</h3>
|
||||
<div id="responseEditor" class="h-96 border border-[var(--color-primary-border)]/20"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Backend Trace Details Modal -->
|
||||
<div x-show="selectedBackendTrace !== null" class="fixed inset-0 bg-black/50 flex items-center justify-center z-50" @click="selectedBackendTrace = null; detailKey = null; detailValue = null;">
|
||||
<div class="bg-[var(--color-bg-secondary)] rounded-lg p-6 max-w-4xl w-full max-h-[90vh] overflow-auto" @click.stop>
|
||||
<template x-if="selectedBackendTrace !== null">
|
||||
<div>
|
||||
<div class="flex justify-between mb-4">
|
||||
<h2 class="h3">Backend Trace Details</h2>
|
||||
<button @click="selectedBackendTrace = null; detailKey = null; detailValue = null;" class="text-[var(--color-text-secondary)] hover:text-[var(--color-text-primary)]">
|
||||
<i class="fas fa-times"></i>
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<!-- Header info -->
|
||||
<div class="grid grid-cols-4 gap-4 mb-4">
|
||||
<div class="bg-[var(--color-bg-primary)] rounded p-3">
|
||||
<div class="text-xs text-[var(--color-text-secondary)] mb-1">Type</div>
|
||||
<span class="inline-flex items-center px-2 py-0.5 rounded text-xs font-medium"
|
||||
:class="getTypeClass(backendTraces[selectedBackendTrace].type)"
|
||||
x-text="backendTraces[selectedBackendTrace].type"></span>
|
||||
</div>
|
||||
<div class="bg-[var(--color-bg-primary)] rounded p-3">
|
||||
<div class="text-xs text-[var(--color-text-secondary)] mb-1">Model</div>
|
||||
<div class="text-sm font-medium" x-text="backendTraces[selectedBackendTrace].model_name || '-'"></div>
|
||||
</div>
|
||||
<div class="bg-[var(--color-bg-primary)] rounded p-3">
|
||||
<div class="text-xs text-[var(--color-text-secondary)] mb-1">Backend</div>
|
||||
<div class="text-sm font-medium" x-text="backendTraces[selectedBackendTrace].backend || '-'"></div>
|
||||
</div>
|
||||
<div class="bg-[var(--color-bg-primary)] rounded p-3">
|
||||
<div class="text-xs text-[var(--color-text-secondary)] mb-1">Duration</div>
|
||||
<div class="text-sm font-medium" x-text="formatDuration(backendTraces[selectedBackendTrace].duration)"></div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Error banner -->
|
||||
<div x-show="backendTraces[selectedBackendTrace].error" class="bg-red-500/10 border border-red-500/30 rounded-lg p-3 mb-4">
|
||||
<div class="flex items-center gap-2">
|
||||
<i class="fas fa-exclamation-triangle text-red-500 text-sm"></i>
|
||||
<span class="text-sm text-red-400" x-text="backendTraces[selectedBackendTrace].error"></span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Data fields table -->
|
||||
<div class="overflow-x-auto">
|
||||
<table class="w-full border-collapse">
|
||||
<thead>
|
||||
<tr class="border-b border-[var(--color-bg-primary)]">
|
||||
<th class="text-left p-2 text-xs font-semibold text-[var(--color-text-secondary)] w-1/4">Field</th>
|
||||
<th class="text-left p-2 text-xs font-semibold text-[var(--color-text-secondary)]">Value</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<template x-for="[key, value] in getDataEntries(selectedBackendTrace)" :key="key">
|
||||
<tr class="border-b border-[var(--color-bg-primary)] hover:bg-[var(--color-bg-primary)]/50 transition-colors">
|
||||
<td class="p-2 text-sm font-mono text-[var(--color-primary)]" x-text="key"></td>
|
||||
<td class="p-2 text-sm">
|
||||
<template x-if="isLargeValue(value)">
|
||||
<button @click="showValueDetail(key, value)"
|
||||
class="text-left max-w-full">
|
||||
<span class="block truncate max-w-lg text-[var(--color-text-secondary)]" x-text="truncateValue(value, 120)"></span>
|
||||
<span class="text-xs text-[var(--color-primary)] hover:underline mt-0.5 inline-block">View full value</span>
|
||||
</button>
|
||||
</template>
|
||||
<template x-if="!isLargeValue(value)">
|
||||
<span class="font-mono text-xs" x-text="formatValue(value)"></span>
|
||||
</template>
|
||||
</td>
|
||||
</tr>
|
||||
</template>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
</template>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Value Detail Modal -->
|
||||
<div x-show="detailValue !== null" class="fixed inset-0 bg-black/50 flex items-center justify-center z-[60]" @click="detailValue = null; detailKey = null;">
|
||||
<div class="bg-[var(--color-bg-secondary)] rounded-lg p-6 max-w-4xl w-full max-h-[90vh] overflow-auto" @click.stop>
|
||||
<div class="flex justify-between mb-4">
|
||||
<h2 class="h3 font-mono" x-text="detailKey"></h2>
|
||||
<button @click="detailValue = null; detailKey = null;" class="text-[var(--color-text-secondary)] hover:text-[var(--color-text-primary)]">
|
||||
<i class="fas fa-times"></i>
|
||||
</button>
|
||||
</div>
|
||||
<div id="detailEditor" class="h-[70vh] border border-[var(--color-primary-border)]/20"></div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
<!-- CodeMirror -->
|
||||
<link rel="stylesheet" href="static/assets/codemirror.min.css">
|
||||
<script src="static/assets/codemirror.min.js"></script>
|
||||
<script src="static/assets/javascript.min.js"></script>
|
||||
|
||||
<!-- Styles from model-editor -->
|
||||
<style>
|
||||
.CodeMirror {
|
||||
height: 100% !important;
|
||||
font-family: monospace;
|
||||
}
|
||||
</style>
|
||||
|
||||
<script>
|
||||
function tracesApp() {
|
||||
return {
|
||||
activeTab: 'api',
|
||||
traces: [],
|
||||
backendTraces: [],
|
||||
selectedTrace: null,
|
||||
selectedBackendTrace: null,
|
||||
detailKey: null,
|
||||
detailValue: null,
|
||||
requestEditor: null,
|
||||
responseEditor: null,
|
||||
detailEditor: null,
|
||||
expandedTraces: {},
|
||||
expandedBackendTraces: {},
|
||||
expandedBackendFields: {},
|
||||
notifications: [],
|
||||
settings: {
|
||||
enable_tracing: false,
|
||||
@@ -474,6 +436,7 @@ function tracesApp() {
|
||||
if (confirm('Clear all API traces?')) {
|
||||
await fetch('/api/traces/clear', { method: 'POST' });
|
||||
this.traces = [];
|
||||
this.expandedTraces = {};
|
||||
}
|
||||
},
|
||||
|
||||
@@ -481,101 +444,67 @@ function tracesApp() {
|
||||
if (confirm('Clear all backend traces?')) {
|
||||
await fetch('/api/backend-traces/clear', { method: 'POST' });
|
||||
this.backendTraces = [];
|
||||
this.expandedBackendTraces = {};
|
||||
this.expandedBackendFields = {};
|
||||
}
|
||||
},
|
||||
|
||||
showDetails(index) {
|
||||
this.selectedTrace = index;
|
||||
this.$nextTick(() => {
|
||||
const trace = this.traces[index];
|
||||
|
||||
const decodeBase64 = (base64) => {
|
||||
const binaryString = atob(base64);
|
||||
const bytes = new Uint8Array(binaryString.length);
|
||||
for (let i = 0; i < binaryString.length; i++) {
|
||||
bytes[i] = binaryString.charCodeAt(i);
|
||||
}
|
||||
return new TextDecoder().decode(bytes);
|
||||
};
|
||||
|
||||
const formatBody = (bodyText) => {
|
||||
try {
|
||||
const json = JSON.parse(bodyText);
|
||||
return JSON.stringify(json, null, 2);
|
||||
} catch {
|
||||
return bodyText;
|
||||
}
|
||||
};
|
||||
|
||||
const reqBody = formatBody(decodeBase64(trace.request.body));
|
||||
const resBody = formatBody(decodeBase64(trace.response.body));
|
||||
|
||||
if (!this.requestEditor) {
|
||||
this.requestEditor = CodeMirror(document.getElementById('requestEditor'), {
|
||||
value: reqBody,
|
||||
mode: 'javascript',
|
||||
json: true,
|
||||
theme: 'default',
|
||||
lineNumbers: true,
|
||||
readOnly: true,
|
||||
lineWrapping: true
|
||||
});
|
||||
} else {
|
||||
this.requestEditor.setValue(reqBody);
|
||||
}
|
||||
|
||||
if (!this.responseEditor) {
|
||||
this.responseEditor = CodeMirror(document.getElementById('responseEditor'), {
|
||||
value: resBody,
|
||||
mode: 'javascript',
|
||||
json: true,
|
||||
theme: 'default',
|
||||
lineNumbers: true,
|
||||
readOnly: true,
|
||||
lineWrapping: true
|
||||
});
|
||||
} else {
|
||||
this.responseEditor.setValue(resBody);
|
||||
}
|
||||
});
|
||||
toggleTrace(index) {
|
||||
this.expandedTraces = {
|
||||
...this.expandedTraces,
|
||||
[index]: !this.expandedTraces[index]
|
||||
};
|
||||
},
|
||||
|
||||
showBackendDetails(index) {
|
||||
this.selectedBackendTrace = index;
|
||||
toggleBackendTrace(index) {
|
||||
this.expandedBackendTraces = {
|
||||
...this.expandedBackendTraces,
|
||||
[index]: !this.expandedBackendTraces[index]
|
||||
};
|
||||
},
|
||||
|
||||
showValueDetail(key, value) {
|
||||
this.detailKey = key;
|
||||
let formatted = '';
|
||||
toggleBackendField(index, key) {
|
||||
const fieldKey = index + '-' + key;
|
||||
this.expandedBackendFields = {
|
||||
...this.expandedBackendFields,
|
||||
[fieldKey]: !this.expandedBackendFields[fieldKey]
|
||||
};
|
||||
},
|
||||
|
||||
isBackendFieldExpanded(index, key) {
|
||||
return !!this.expandedBackendFields[index + '-' + key];
|
||||
},
|
||||
|
||||
formatTraceBody(body) {
|
||||
try {
|
||||
const binaryString = atob(body);
|
||||
const bytes = new Uint8Array(binaryString.length);
|
||||
for (let i = 0; i < binaryString.length; i++) {
|
||||
bytes[i] = binaryString.charCodeAt(i);
|
||||
}
|
||||
const text = new TextDecoder().decode(bytes);
|
||||
try {
|
||||
return JSON.stringify(JSON.parse(text), null, 2);
|
||||
} catch {
|
||||
return text;
|
||||
}
|
||||
} catch {
|
||||
return body || '';
|
||||
}
|
||||
},
|
||||
|
||||
formatLargeValue(value) {
|
||||
if (typeof value === 'string') {
|
||||
try {
|
||||
const parsed = JSON.parse(value);
|
||||
formatted = JSON.stringify(parsed, null, 2);
|
||||
return JSON.stringify(JSON.parse(value), null, 2);
|
||||
} catch {
|
||||
formatted = value;
|
||||
return value;
|
||||
}
|
||||
} else if (typeof value === 'object') {
|
||||
formatted = JSON.stringify(value, null, 2);
|
||||
} else {
|
||||
formatted = String(value);
|
||||
}
|
||||
this.detailValue = formatted;
|
||||
|
||||
this.$nextTick(() => {
|
||||
const el = document.getElementById('detailEditor');
|
||||
if (el) {
|
||||
el.innerHTML = '';
|
||||
this.detailEditor = CodeMirror(el, {
|
||||
value: formatted,
|
||||
mode: 'javascript',
|
||||
json: true,
|
||||
theme: 'default',
|
||||
lineNumbers: true,
|
||||
readOnly: true,
|
||||
lineWrapping: true
|
||||
});
|
||||
}
|
||||
});
|
||||
if (typeof value === 'object') {
|
||||
return JSON.stringify(value, null, 2);
|
||||
}
|
||||
return String(value);
|
||||
},
|
||||
|
||||
formatTimestamp(ts) {
|
||||
@@ -623,12 +552,6 @@ function tracesApp() {
|
||||
if (typeof value === 'boolean') return value ? 'true' : 'false';
|
||||
if (typeof value === 'object') return JSON.stringify(value);
|
||||
return String(value);
|
||||
},
|
||||
|
||||
getDataEntries(index) {
|
||||
const trace = this.backendTraces[index];
|
||||
if (!trace || !trace.data) return [];
|
||||
return Object.entries(trace.data);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -115,7 +115,7 @@ const (
|
||||
TranscriptionResponseFormatVtt = TranscriptionResponseFormatType("vtt")
|
||||
TranscriptionResponseFormatLrc = TranscriptionResponseFormatType("lrc")
|
||||
TranscriptionResponseFormatJson = TranscriptionResponseFormatType("json")
|
||||
TranscriptionResponseFormatJsonVerbose = TranscriptionResponseFormatType("json_verbose")
|
||||
TranscriptionResponseFormatJsonVerbose = TranscriptionResponseFormatType("verbose_json")
|
||||
)
|
||||
|
||||
type ChatCompletionResponseFormat struct {
|
||||
|
||||
@@ -60,6 +60,22 @@ diffusers:
|
||||
scheduler_type: "k_dpmpp_sde"
|
||||
```
|
||||
|
||||
### Multi-GPU Support
|
||||
|
||||
For multi-GPU support with diffusers, you need to configure the model with `tensor_parallel_size` set to the number of GPUs you want to use.
|
||||
|
||||
```yaml
|
||||
name: stable-diffusion-multigpu
|
||||
model: stabilityai/stable-diffusion-xl-base-1.0
|
||||
backend: diffusers
|
||||
parameters:
|
||||
tensor_parallel_size: 2 # Number of GPUs to use
|
||||
```
|
||||
|
||||
The `tensor_parallel_size` parameter is set in the gRPC proto configuration (in `ModelOptions` message, field 55). When this is set to a value greater than 1, the diffusers backend automatically enables `device_map="auto"` to distribute the model across multiple GPUs.
|
||||
|
||||
When using diffusers with multiple GPUs, ensure you have sufficient GPU memory across all devices. The model will be automatically distributed across available GPUs. For optimal performance, use GPUs of the same type and memory capacity.
|
||||
|
||||
## CUDA(NVIDIA) acceleration
|
||||
|
||||
### Requirements
|
||||
|
||||
@@ -57,7 +57,7 @@ Result:
|
||||
|
||||
---
|
||||
|
||||
You can also specify the `response_format` parameter to be one of `lrc`, `srt`, `vtt`, `text`, `json` or `json_verbose` (default):
|
||||
You can also specify the `response_format` parameter to be one of `lrc`, `srt`, `vtt`, `text`, `json` or `verbose_json` (default):
|
||||
```bash
|
||||
## Send the example audio file to the transcriptions endpoint
|
||||
curl http://localhost:8080/v1/audio/transcriptions -H "Content-Type: multipart/form-data" -F file="@$PWD/gb1.ogg" -F model="whisper-1" -F response_format="srt"
|
||||
|
||||
@@ -46,6 +46,7 @@ Complete reference for all LocalAI command-line interface (CLI) parameters and e
|
||||
| `--watchdog-idle-timeout` | `15m` | Threshold beyond which an idle backend should be stopped | `$LOCALAI_WATCHDOG_IDLE_TIMEOUT`, `$WATCHDOG_IDLE_TIMEOUT` |
|
||||
| `--enable-watchdog-busy` | `false` | Enable watchdog for stopping backends that are busy longer than the watchdog-busy-timeout | `$LOCALAI_WATCHDOG_BUSY`, `$WATCHDOG_BUSY` |
|
||||
| `--watchdog-busy-timeout` | `5m` | Threshold beyond which a busy backend should be stopped | `$LOCALAI_WATCHDOG_BUSY_TIMEOUT`, `$WATCHDOG_BUSY_TIMEOUT` |
|
||||
| `--watchdog-interval` | `500ms` | Interval between watchdog checks (e.g., `500ms`, `5s`, `1m`) | `$LOCALAI_WATCHDOG_INTERVAL`, `$WATCHDOG_INTERVAL` |
|
||||
| `--force-eviction-when-busy` | `false` | Force eviction even when models have active API calls (default: false for safety). **Warning:** Enabling this can interrupt active requests | `$LOCALAI_FORCE_EVICTION_WHEN_BUSY`, `$FORCE_EVICTION_WHEN_BUSY` |
|
||||
| `--lru-eviction-max-retries` | `30` | Maximum number of retries when waiting for busy models to become idle before eviction | `$LOCALAI_LRU_EVICTION_MAX_RETRIES`, `$LRU_EVICTION_MAX_RETRIES` |
|
||||
| `--lru-eviction-retry-interval` | `1s` | Interval between retries when waiting for busy models to become idle (e.g., `1s`, `2s`) | `$LOCALAI_LRU_EVICTION_RETRY_INTERVAL`, `$LRU_EVICTION_RETRY_INTERVAL` |
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
{
|
||||
"version": "v3.12.0"
|
||||
"version": "v3.12.1"
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user