mirror of
https://github.com/mudler/LocalAI.git
synced 2026-05-29 11:07:18 -04:00
* feat(usage): add Source, APIKeyID, APIKeyName columns to UsageRecord Adds three additive columns plus UsageSource* constants. The columns are auto-migrated by InitDB. APIKeyID is a nullable foreign reference to UserAPIKey.ID; APIKeyName is snapshotted on each row so revoked keys keep showing their name in history. Refs: #9862 Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * feat(usage): backfill Source on pre-feature usage rows InitDB now classifies any pre-existing usage_record with an empty source: 'legacy-api-key' user -> legacy, everything else -> web. The backfill is idempotent (only touches NULL/empty rows). Refs: #9862 Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * feat(usage): add GetUserUsageBySource aggregator Groups by (bucket, source, api_key_id, api_key_name). Filters out legacy by default. Returns both per-bucket detail and roll-ups (by_source, by_key sorted desc and capped at 200, grand_total). The MAX(created_at) projection is iterated via Rows().Scan into a string column and parsed manually because the SQLite driver surfaces the aggregated timestamp as a string, which database/sql refuses to scan directly into time.Time. Postgres returns a real timestamp; the same string path handles its RFC3339 form too. Refs: #9862 Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(usage): log Rows() errors and assert LastUsed in tests Adds rows.Err() and Rows() open-failure logging in computeSourceTotals so silent data drops surface in logs. Logs on parseLastUsedString format misses for the same reason. Strengthens the snapshot-survival test to assert LastUsed is a recent timestamp, locking the SQLite time-string parser behaviour. Refs: #9862 Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * feat(usage): add admin GetAllUsageBySource with filters and truncation Optional user_id and api_key_id filters (composed with AND). Legacy bucket is included for admin callers. truncated=true when more than 200 distinct keys would be in the by_key roll-up. Refs: #9862 Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * feat(auth): plumb auth_source and auth_apikey through Echo context tryAuthenticate now sets auth_source on every successful branch (web for session/Bearer-session, apikey for Bearer-key/x-api-key/ token-cookie, legacy for legacy env key match). For named-key branches it also stores the resolved *UserAPIKey under auth_apikey so downstream middlewares can snapshot id+name without re-validating. Refs: #9862 Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(auth): expand tryAuthenticate godoc and cover Bearer-session branch Documents all three context-keys side effects (auth_source, auth_apikey, _auth_session) plus the split of responsibilities with the parent Middleware. Adds a test for the Bearer-as-session-token classification so future regressions there fail loudly. Refs: #9862 Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * feat(usage): UsageMiddleware records source + snapshots key name Reads auth_source and auth_apikey from the Echo context (set by auth.Middleware in the previous task). Snapshots UserAPIKey.ID and Name onto each row so revoked keys remain readable in history. Falls back to source=web when no auth_source is set (auth disabled or unrecognised path). Refs: #9862 Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * feat(usage): add /api/auth/usage/sources and admin variant Self endpoint filters legacy server-side; admin endpoint includes legacy and accepts user_id + api_key_id filters. Response includes buckets, totals.{by_source, by_key, grand_total}, and a truncated flag set when the per-key roll-up was capped at 200. Refs: #9862 Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * docs(routes): mark test mirror handlers as keep-in-sync with production The newTestAuthApp helper duplicates production route handlers inline because it cannot use RegisterAuthRoutes (which requires a *application.Application). Naming the source path on each mirror makes the drift contract explicit for future maintainers. Refs: #9862 Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * feat(ui): add usageApi.getMySources/getAdminSources + i18n strings Refs: #9862 Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * feat(ui): add Sources tab skeleton with data fetch Adds Usage page tab that fetches /api/auth/usage/sources (or the admin variant). Renders raw totals plus a placeholder key list; real visualisations land in subsequent commits. Restructures the existing tab button block so Models and Sources are visible to non-admins (Users remains admin-only). Refs: #9862 Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * feat(ui): source mix ribbon + searchable/sortable sources table Replaces the SourcesTab placeholder rendering with two reusable components: SourceMixRibbon (one segmented bar per source class) and SourcesTable (search + sort + revoked-key dim). Pulls the current API key list to detect revoked keys. Refs: #9862 Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(ui): skip revoked-key detection until the key list is known existingKeyIds defaulted to an empty Set, which made every live api_key row render as (revoked) during the brief window before apiKeysApi.list() resolved, and permanently after a fetch failure. Use null as the unknown state and suppress the revoked badge until the parent provides a real Set. Refs: #9862 Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * feat(ui): top-N stacked time chart and drill-in chip for Sources tab Top 7 sources by total tokens get distinct colours; the rest roll up into 'Other'. Clicking a row in the SourcesTable dims everything except that series in the chart; the chip is the canonical clear. Refs: #9862 Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * docs(usage): document per-API-key Sources tab and endpoints Extends features/authentication.md Usage Tracking section with: - A 'Sources' tab description and source-class taxonomy - Endpoint documentation for /api/auth/usage/sources and the admin variant - Response shape example with by_source / by_key / grand_total - Migration note about pre-feature row backfill Refs: #9862 Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(usage): silence errcheck on deferred rows.Close CI errcheck flagged the bare 'defer rows.Close()' in computeSourceTotals. Wrap in a closure that discards the close error explicitly; an error here is non-actionable since we have already drained the rows and logged any iteration failure. Refs: #9862 Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactor(usage): bound batcher intake and add Shutdown/FlushNow hooks The pre-existing usage batcher had no cap on its add() path; the usageMaxPending=5000 constant only guarded the re-queue path after a failed write, leaving memory growth unbounded if the DB fell behind. This commit: - Adds the cap to add() so saturation drops new records (rate-limited warn at 1/1024) instead of growing unbounded. - Raises usageMaxPending to 50000 to absorb realistic inference bursts. - Replaces the package-level batcher global with a mutex-guarded pair plus a currentBatcher() accessor so Init / Shutdown cycles are race-free. - Adds ShutdownUsageRecorder() for graceful drain on process exit (not yet wired into app shutdown, just published). - Adds FlushNow() for deterministic tests; the middleware suite no longer needs 6s sleeps per spec and now runs in ~50ms instead of 18s. - Re-queue on failed flush is now cap-aware: prepends as much of the failed batch as fits alongside concurrent arrivals, instead of dropping the whole batch when full. Refs: #9862 Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * feat(usage): drain usage batcher on graceful shutdown Registers ShutdownUsageRecorder with the existing signals.RegisterGracefulTerminationHandler so SIGINT/SIGTERM synchronously flushes any in-memory usage records before the process exits. Without this, up to one flush interval (5s) of recorded usage was lost when LocalAI restarted. Refs: #9862 Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
292 lines
6.9 KiB
Go
292 lines
6.9 KiB
Go
package middleware
|
|
|
|
import (
|
|
"bytes"
|
|
"encoding/json"
|
|
"sync"
|
|
"sync/atomic"
|
|
"time"
|
|
|
|
"github.com/labstack/echo/v4"
|
|
"github.com/mudler/LocalAI/core/http/auth"
|
|
"github.com/mudler/xlog"
|
|
"gorm.io/gorm"
|
|
)
|
|
|
|
const (
|
|
usageFlushInterval = 5 * time.Second
|
|
// usageMaxPending bounds the in-memory queue. Sized for bursty inference
|
|
// traffic on a self-hosted instance with a slow or unavailable DB.
|
|
usageMaxPending = 50000
|
|
)
|
|
|
|
// usageBatcher accumulates usage records and flushes them to the DB periodically.
|
|
type usageBatcher struct {
|
|
mu sync.Mutex
|
|
pending []*auth.UsageRecord
|
|
db *gorm.DB
|
|
stop chan struct{}
|
|
done chan struct{}
|
|
stopOnce sync.Once
|
|
}
|
|
|
|
// droppedRecords counts records discarded because the in-memory queue was full.
|
|
// Used to rate-limit the warn log so a sustained outage doesn't flood it.
|
|
var droppedRecords atomic.Uint64
|
|
|
|
func (b *usageBatcher) add(r *auth.UsageRecord) {
|
|
b.mu.Lock()
|
|
if len(b.pending) >= usageMaxPending {
|
|
b.mu.Unlock()
|
|
// Rate-limit: one warn per 1024 drops keeps the log readable.
|
|
n := droppedRecords.Add(1)
|
|
if n&1023 == 1 {
|
|
xlog.Warn("usage batcher full, dropping record",
|
|
"cap", usageMaxPending, "total_dropped", n)
|
|
}
|
|
return
|
|
}
|
|
b.pending = append(b.pending, r)
|
|
b.mu.Unlock()
|
|
}
|
|
|
|
func (b *usageBatcher) flush() {
|
|
b.mu.Lock()
|
|
batch := b.pending
|
|
b.pending = nil
|
|
b.mu.Unlock()
|
|
|
|
if len(batch) == 0 {
|
|
return
|
|
}
|
|
|
|
if err := b.db.Create(&batch).Error; err != nil {
|
|
xlog.Error("Failed to flush usage batch", "count", len(batch), "error", err)
|
|
// Cap-aware re-queue: prepend as much of the failed batch as fits
|
|
// alongside any records added concurrently with the failed write.
|
|
b.mu.Lock()
|
|
room := usageMaxPending - len(b.pending)
|
|
if room > 0 {
|
|
if room > len(batch) {
|
|
room = len(batch)
|
|
}
|
|
b.pending = append(batch[:room], b.pending...)
|
|
}
|
|
b.mu.Unlock()
|
|
}
|
|
}
|
|
|
|
func (b *usageBatcher) run() {
|
|
defer close(b.done)
|
|
ticker := time.NewTicker(usageFlushInterval)
|
|
defer ticker.Stop()
|
|
for {
|
|
select {
|
|
case <-ticker.C:
|
|
b.flush()
|
|
case <-b.stop:
|
|
b.flush() // final drain
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
func (b *usageBatcher) shutdown() {
|
|
b.stopOnce.Do(func() {
|
|
close(b.stop)
|
|
<-b.done
|
|
})
|
|
}
|
|
|
|
// The package-level batcher is guarded by batcherMu so Init / Shutdown cycles
|
|
// (the test pattern) don't race against UsageMiddleware reads.
|
|
var (
|
|
batcherMu sync.RWMutex
|
|
batcher *usageBatcher
|
|
)
|
|
|
|
func currentBatcher() *usageBatcher {
|
|
batcherMu.RLock()
|
|
defer batcherMu.RUnlock()
|
|
return batcher
|
|
}
|
|
|
|
// InitUsageRecorder starts a background goroutine that periodically flushes
|
|
// accumulated usage records to the database. Calling it more than once
|
|
// shuts down the previous batcher first so its goroutine doesn't leak.
|
|
func InitUsageRecorder(db *gorm.DB) {
|
|
if db == nil {
|
|
return
|
|
}
|
|
|
|
batcherMu.Lock()
|
|
old := batcher
|
|
batcher = nil
|
|
batcherMu.Unlock()
|
|
if old != nil {
|
|
old.shutdown()
|
|
}
|
|
|
|
b := &usageBatcher{
|
|
db: db,
|
|
stop: make(chan struct{}),
|
|
done: make(chan struct{}),
|
|
}
|
|
batcherMu.Lock()
|
|
batcher = b
|
|
batcherMu.Unlock()
|
|
|
|
go b.run()
|
|
}
|
|
|
|
// ShutdownUsageRecorder stops the background flusher and synchronously drains
|
|
// pending records once. Safe to call multiple times. Not yet wired into the
|
|
// application lifecycle; intended for graceful process exit and tests.
|
|
func ShutdownUsageRecorder() {
|
|
batcherMu.Lock()
|
|
b := batcher
|
|
batcher = nil
|
|
batcherMu.Unlock()
|
|
if b != nil {
|
|
b.shutdown()
|
|
}
|
|
}
|
|
|
|
// FlushNow synchronously flushes any pending usage records. Intended for tests
|
|
// that need deterministic behaviour without waiting for the ticker.
|
|
func FlushNow() {
|
|
if b := currentBatcher(); b != nil {
|
|
b.flush()
|
|
}
|
|
}
|
|
|
|
// usageResponseBody is the minimal structure we need from the response JSON.
|
|
type usageResponseBody struct {
|
|
Model string `json:"model"`
|
|
Usage *struct {
|
|
PromptTokens int64 `json:"prompt_tokens"`
|
|
CompletionTokens int64 `json:"completion_tokens"`
|
|
TotalTokens int64 `json:"total_tokens"`
|
|
} `json:"usage"`
|
|
}
|
|
|
|
// UsageMiddleware extracts token usage from OpenAI-compatible response JSON
|
|
// and records it per-user.
|
|
func UsageMiddleware(db *gorm.DB) echo.MiddlewareFunc {
|
|
return func(next echo.HandlerFunc) echo.HandlerFunc {
|
|
return func(c echo.Context) error {
|
|
b := currentBatcher()
|
|
if db == nil || b == nil {
|
|
return next(c)
|
|
}
|
|
|
|
startTime := time.Now()
|
|
|
|
// Wrap response writer to capture body
|
|
resBody := new(bytes.Buffer)
|
|
origWriter := c.Response().Writer
|
|
mw := &bodyWriter{
|
|
ResponseWriter: origWriter,
|
|
body: resBody,
|
|
}
|
|
c.Response().Writer = mw
|
|
|
|
handlerErr := next(c)
|
|
|
|
// Restore original writer
|
|
c.Response().Writer = origWriter
|
|
|
|
// Only record on successful responses
|
|
if c.Response().Status < 200 || c.Response().Status >= 300 {
|
|
return handlerErr
|
|
}
|
|
|
|
// Get authenticated user
|
|
user := auth.GetUser(c)
|
|
if user == nil {
|
|
return handlerErr
|
|
}
|
|
|
|
// Try to parse usage from response
|
|
responseBytes := resBody.Bytes()
|
|
if len(responseBytes) == 0 {
|
|
return handlerErr
|
|
}
|
|
|
|
// Check content type
|
|
ct := c.Response().Header().Get("Content-Type")
|
|
isJSON := ct == "" || ct == "application/json" || bytes.HasPrefix([]byte(ct), []byte("application/json"))
|
|
isSSE := bytes.HasPrefix([]byte(ct), []byte("text/event-stream"))
|
|
|
|
if !isJSON && !isSSE {
|
|
return handlerErr
|
|
}
|
|
|
|
var resp usageResponseBody
|
|
if isSSE {
|
|
last, ok := lastSSEData(responseBytes)
|
|
if !ok {
|
|
return handlerErr
|
|
}
|
|
if err := json.Unmarshal(last, &resp); err != nil {
|
|
return handlerErr
|
|
}
|
|
} else {
|
|
if err := json.Unmarshal(responseBytes, &resp); err != nil {
|
|
return handlerErr
|
|
}
|
|
}
|
|
|
|
if resp.Usage == nil {
|
|
return handlerErr
|
|
}
|
|
|
|
source := auth.GetSource(c)
|
|
if source == "" {
|
|
// Auth disabled or unrecognised path: classify as web so the row is still
|
|
// bucketable rather than silently dropped from per-source aggregates.
|
|
source = auth.UsageSourceWeb
|
|
}
|
|
|
|
record := &auth.UsageRecord{
|
|
UserID: user.ID,
|
|
UserName: user.Name,
|
|
Source: source,
|
|
Model: resp.Model,
|
|
Endpoint: c.Request().URL.Path,
|
|
PromptTokens: resp.Usage.PromptTokens,
|
|
CompletionTokens: resp.Usage.CompletionTokens,
|
|
TotalTokens: resp.Usage.TotalTokens,
|
|
Duration: time.Since(startTime).Milliseconds(),
|
|
CreatedAt: startTime,
|
|
}
|
|
|
|
if key := auth.GetAPIKey(c); key != nil {
|
|
id := key.ID
|
|
record.APIKeyID = &id
|
|
record.APIKeyName = key.Name
|
|
}
|
|
|
|
b.add(record)
|
|
|
|
return handlerErr
|
|
}
|
|
}
|
|
}
|
|
|
|
// lastSSEData returns the payload of the last "data: " line whose content is not "[DONE]".
|
|
func lastSSEData(b []byte) ([]byte, bool) {
|
|
prefix := []byte("data: ")
|
|
var last []byte
|
|
for _, line := range bytes.Split(b, []byte("\n")) {
|
|
line = bytes.TrimRight(line, "\r")
|
|
if bytes.HasPrefix(line, prefix) {
|
|
payload := line[len(prefix):]
|
|
if !bytes.Equal(payload, []byte("[DONE]")) {
|
|
last = payload
|
|
}
|
|
}
|
|
}
|
|
return last, last != nil
|
|
}
|