Compare commits

...

11 Commits

Author SHA1 Message Date
dependabot[bot]
fdc9f7bf35 chore(deps): bump go.opentelemetry.io/otel/exporters/prometheus from 0.64.0 to 0.65.0 (#9254)
chore(deps): bump go.opentelemetry.io/otel/exporters/prometheus

Bumps [go.opentelemetry.io/otel/exporters/prometheus](https://github.com/open-telemetry/opentelemetry-go) from 0.64.0 to 0.65.0.
- [Release notes](https://github.com/open-telemetry/opentelemetry-go/releases)
- [Changelog](https://github.com/open-telemetry/opentelemetry-go/blob/main/CHANGELOG.md)
- [Commits](https://github.com/open-telemetry/opentelemetry-go/compare/exporters/prometheus/v0.64.0...exporters/prometheus/v0.65.0)

---
updated-dependencies:
- dependency-name: go.opentelemetry.io/otel/exporters/prometheus
  dependency-version: 0.65.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-04-07 00:39:52 +02:00
LocalAI [bot]
8e59346091 chore: ⬆️ Update leejet/stable-diffusion.cpp to 8afbeb6ba9702c15d41a38296f2ab1fe5c829fa0 (#9262)
⬆️ Update leejet/stable-diffusion.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2026-04-07 00:39:38 +02:00
LocalAI [bot]
e6e4e19633 chore: ⬆️ Update ace-step/acestep.cpp to e0c8d75a672fca5684c88c68dbf6d12f58754258 (#9261)
⬆️ Update ace-step/acestep.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2026-04-07 00:39:24 +02:00
Ettore Di Giacinto
505c417fa7 fix(gpu): better detection for MacOS and Thor (#9263)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-04-07 00:39:07 +02:00
LocalAI [bot]
17215f6fbc docs: ⬆️ update docs version mudler/LocalAI (#9260)
⬆️ Update docs version mudler/LocalAI

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2026-04-07 00:38:50 +02:00
LocalAI [bot]
bccaba1f66 chore: ⬆️ Update ggml-org/llama.cpp to d0a6dfeb28a09831d904fc4d910ddb740da82834 (#9259)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2026-04-07 00:38:36 +02:00
Ettore Di Giacinto
0f9d516a6c fix(anthropic): do not emit empty tokens and fix SSE tool calls (#9258)
This fixes Claude Code compatibility

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-04-07 00:38:21 +02:00
dependabot[bot]
33b124c6f1 chore(deps): bump github.com/aws/aws-sdk-go-v2/config from 1.32.12 to 1.32.14 (#9256)
chore(deps): bump github.com/aws/aws-sdk-go-v2/config

Bumps [github.com/aws/aws-sdk-go-v2/config](https://github.com/aws/aws-sdk-go-v2) from 1.32.12 to 1.32.14.
- [Release notes](https://github.com/aws/aws-sdk-go-v2/releases)
- [Commits](https://github.com/aws/aws-sdk-go-v2/compare/config/v1.32.12...config/v1.32.14)

---
updated-dependencies:
- dependency-name: github.com/aws/aws-sdk-go-v2/config
  dependency-version: 1.32.14
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-04-06 21:46:52 +02:00
dependabot[bot]
6b8007e88e chore(deps): bump github.com/jaypipes/ghw from 0.23.0 to 0.24.0 (#9250)
Bumps [github.com/jaypipes/ghw](https://github.com/jaypipes/ghw) from 0.23.0 to 0.24.0.
- [Release notes](https://github.com/jaypipes/ghw/releases)
- [Commits](https://github.com/jaypipes/ghw/compare/v0.23.0...v0.24.0)

---
updated-dependencies:
- dependency-name: github.com/jaypipes/ghw
  dependency-version: 0.24.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-04-06 21:46:18 +02:00
dependabot[bot]
b3837c2078 chore(deps): bump google.golang.org/grpc from 1.79.3 to 1.80.0 (#9253)
Bumps [google.golang.org/grpc](https://github.com/grpc/grpc-go) from 1.79.3 to 1.80.0.
- [Release notes](https://github.com/grpc/grpc-go/releases)
- [Commits](https://github.com/grpc/grpc-go/compare/v1.79.3...v1.80.0)

---
updated-dependencies:
- dependency-name: google.golang.org/grpc
  dependency-version: 1.80.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-04-06 21:45:50 +02:00
Ettore Di Giacinto
92f99b1ec3 fix(token): login via legacy api keys (#9249)
We were not checking against the api keys when db == nil.

This commit also cleanups now unused middleware

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-04-06 21:45:09 +02:00
14 changed files with 578 additions and 568 deletions

View File

@@ -1,5 +1,5 @@
LLAMA_VERSION?=761797ffdf2ce3f118e82c663b1ad7d935fbd656
LLAMA_VERSION?=d0a6dfeb28a09831d904fc4d910ddb740da82834
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
CMAKE_ARGS?=

View File

@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
# acestep.cpp version
ACESTEP_REPO?=https://github.com/ace-step/acestep.cpp
ACESTEP_CPP_VERSION?=6f35c874ee11e86d511b860019b84976f5b52d3a
ACESTEP_CPP_VERSION?=e0c8d75a672fca5684c88c68dbf6d12f58754258
SO_TARGET?=libgoacestepcpp.so
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF

View File

@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
# stablediffusion.cpp (ggml)
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
STABLEDIFFUSION_GGML_VERSION?=7397ddaa86f4e8837d5261724678cde0f36d4d89
STABLEDIFFUSION_GGML_VERSION?=8afbeb6ba9702c15d41a38296f2ab1fe5c829fa0
CMAKE_ARGS+=-DGGML_MAX_NAME=128

View File

@@ -3,6 +3,8 @@ package anthropic
import (
"encoding/json"
"fmt"
"sync"
"time"
"github.com/google/uuid"
"github.com/labstack/echo/v4"
@@ -366,7 +368,33 @@ func handleAnthropicStream(c echo.Context, id string, input *schema.AnthropicReq
// Collect tool calls for MCP execution
var collectedToolCalls []functions.FuncCallResults
// SSE keepalive: send comment pings every 3s until the first token arrives.
// This prevents clients (e.g. Claude Code) from timing out while the model loads or processes the prompt.
firstTokenReceived := make(chan struct{})
keepaliveDone := make(chan struct{})
go func() {
defer close(keepaliveDone)
ticker := time.NewTicker(3 * time.Second)
defer ticker.Stop()
for {
select {
case <-firstTokenReceived:
return
case <-c.Request().Context().Done():
return
case <-ticker.C:
fmt.Fprintf(c.Response().Writer, "event: ping\ndata: {\"type\": \"ping\"}\n\n")
c.Response().Flush()
}
}
}()
firstTokenOnce := sync.Once{}
tokenCallback := func(token string, usage backend.TokenUsage) bool {
firstTokenOnce.Do(func() {
close(firstTokenReceived)
<-keepaliveDone // wait for keepalive goroutine to exit before writing
})
accumulatedContent += token
if shouldUseFn {
@@ -414,7 +442,7 @@ func handleAnthropicStream(c echo.Context, id string, input *schema.AnthropicReq
}
}
if !inToolCall {
if !inToolCall && token != "" {
sendAnthropicSSE(c, schema.AnthropicStreamEvent{
Type: "content_block_delta",
Index: intPtr(0),
@@ -433,6 +461,11 @@ func handleAnthropicStream(c echo.Context, id string, input *schema.AnthropicReq
openAIReq.Metadata = input.Metadata
_, tokenUsage, chatDeltas, err := openaiEndpoint.ComputeChoices(openAIReq, predInput, cfg, cl, appConfig, ml, func(s string, c *[]schema.Choice) {}, tokenCallback)
// Stop the keepalive goroutine now that inference is done
firstTokenOnce.Do(func() { close(firstTokenReceived) })
<-keepaliveDone
if err != nil {
xlog.Error("Anthropic stream model inference failed", "error", err)
sendAnthropicSSE(c, schema.AnthropicStreamEvent{
@@ -445,9 +478,68 @@ func handleAnthropicStream(c echo.Context, id string, input *schema.AnthropicReq
return nil
}
// Also check chat deltas for tool calls
if deltaToolCalls := functions.ToolCallsFromChatDeltas(chatDeltas); len(deltaToolCalls) > 0 && len(collectedToolCalls) == 0 {
collectedToolCalls = deltaToolCalls
// Check chat deltas from C++ autoparser — when active, the raw
// message is cleared and content/tool calls arrive via ChatDeltas.
if len(chatDeltas) > 0 {
deltaContent := functions.ContentFromChatDeltas(chatDeltas)
deltaToolCalls := functions.ToolCallsFromChatDeltas(chatDeltas)
// Emit text content from ChatDeltas only when the tokenCallback
// didn't already stream it (autoparser clears raw text, so
// accumulatedContent will be empty in that case).
if deltaContent != "" && !inToolCall && accumulatedContent == "" {
sendAnthropicSSE(c, schema.AnthropicStreamEvent{
Type: "content_block_delta",
Index: intPtr(0),
Delta: &schema.AnthropicStreamDelta{
Type: "text_delta",
Text: deltaContent,
},
})
}
// Emit tool_use blocks from ChatDeltas
if len(deltaToolCalls) > 0 && len(collectedToolCalls) == 0 {
collectedToolCalls = deltaToolCalls
if !inToolCall && currentBlockIndex == 0 {
sendAnthropicSSE(c, schema.AnthropicStreamEvent{
Type: "content_block_stop",
Index: intPtr(currentBlockIndex),
})
currentBlockIndex++
inToolCall = true
}
for i, tc := range deltaToolCalls {
toolCallID := tc.ID
if toolCallID == "" {
toolCallID = fmt.Sprintf("toolu_%s_%d", id, i)
}
sendAnthropicSSE(c, schema.AnthropicStreamEvent{
Type: "content_block_start",
Index: intPtr(currentBlockIndex),
ContentBlock: &schema.AnthropicContentBlock{
Type: "tool_use",
ID: toolCallID,
Name: tc.Name,
},
})
sendAnthropicSSE(c, schema.AnthropicStreamEvent{
Type: "content_block_delta",
Index: intPtr(currentBlockIndex),
Delta: &schema.AnthropicStreamDelta{
Type: "input_json_delta",
PartialJSON: tc.Arguments,
},
})
sendAnthropicSSE(c, schema.AnthropicStreamEvent{
Type: "content_block_stop",
Index: intPtr(currentBlockIndex),
})
currentBlockIndex++
toolCallsEmitted++
}
}
}
// MCP streaming tool execution: if we collected MCP tool calls, execute and loop

View File

@@ -1,179 +0,0 @@
package middleware
import (
"crypto/subtle"
"errors"
"net/http"
"strings"
"github.com/labstack/echo/v4"
"github.com/labstack/echo/v4/middleware"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/schema"
)
var ErrMissingOrMalformedAPIKey = errors.New("missing or malformed API Key")
// GetKeyAuthConfig returns Echo's KeyAuth middleware configuration
func GetKeyAuthConfig(applicationConfig *config.ApplicationConfig) (echo.MiddlewareFunc, error) {
// Create validator function
validator := getApiKeyValidationFunction(applicationConfig)
// Create error handler
errorHandler := getApiKeyErrorHandler(applicationConfig)
// Create Next function (skip middleware for certain requests)
skipper := getApiKeyRequiredFilterFunction(applicationConfig)
// Wrap it with our custom key lookup that checks multiple sources
return func(next echo.HandlerFunc) echo.HandlerFunc {
return func(c echo.Context) error {
if len(applicationConfig.ApiKeys) == 0 {
return next(c)
}
// Skip if skipper says so
if skipper != nil && skipper(c) {
return next(c)
}
// Try to extract key from multiple sources
key, err := extractKeyFromMultipleSources(c)
if err != nil {
return errorHandler(err, c)
}
// Validate the key
valid, err := validator(key, c)
if err != nil || !valid {
return errorHandler(ErrMissingOrMalformedAPIKey, c)
}
// Store key in context for later use
c.Set("api_key", key)
return next(c)
}
}, nil
}
// extractKeyFromMultipleSources checks multiple sources for the API key
// in order: Authorization header, x-api-key header, xi-api-key header, token cookie
func extractKeyFromMultipleSources(c echo.Context) (string, error) {
// Check Authorization header first
auth := c.Request().Header.Get("Authorization")
if auth != "" {
// Check for Bearer scheme
if strings.HasPrefix(auth, "Bearer ") {
return strings.TrimPrefix(auth, "Bearer "), nil
}
// If no Bearer prefix, return as-is (for backward compatibility)
return auth, nil
}
// Check x-api-key header
if key := c.Request().Header.Get("x-api-key"); key != "" {
return key, nil
}
// Check xi-api-key header
if key := c.Request().Header.Get("xi-api-key"); key != "" {
return key, nil
}
// Check token cookie
cookie, err := c.Cookie("token")
if err == nil && cookie != nil && cookie.Value != "" {
return cookie.Value, nil
}
return "", ErrMissingOrMalformedAPIKey
}
func getApiKeyErrorHandler(applicationConfig *config.ApplicationConfig) func(error, echo.Context) error {
return func(err error, c echo.Context) error {
if errors.Is(err, ErrMissingOrMalformedAPIKey) {
if len(applicationConfig.ApiKeys) == 0 {
return nil // if no keys are set up, any error we get here is not an error.
}
c.Response().Header().Set("WWW-Authenticate", "Bearer")
if applicationConfig.OpaqueErrors {
return c.NoContent(http.StatusUnauthorized)
}
// Check if the request content type is JSON
contentType := c.Request().Header.Get("Content-Type")
if strings.Contains(contentType, "application/json") {
return c.JSON(http.StatusUnauthorized, schema.ErrorResponse{
Error: &schema.APIError{
Message: "An authentication key is required",
Code: 401,
Type: "invalid_request_error",
},
})
}
return c.Render(http.StatusUnauthorized, "views/login", map[string]any{
"BaseURL": BaseURL(c),
})
}
if applicationConfig.OpaqueErrors {
return c.NoContent(http.StatusInternalServerError)
}
return err
}
}
func getApiKeyValidationFunction(applicationConfig *config.ApplicationConfig) func(string, echo.Context) (bool, error) {
if applicationConfig.UseSubtleKeyComparison {
return func(key string, c echo.Context) (bool, error) {
if len(applicationConfig.ApiKeys) == 0 {
return true, nil // If no keys are setup, accept everything
}
for _, validKey := range applicationConfig.ApiKeys {
if subtle.ConstantTimeCompare([]byte(key), []byte(validKey)) == 1 {
return true, nil
}
}
return false, ErrMissingOrMalformedAPIKey
}
}
return func(key string, c echo.Context) (bool, error) {
if len(applicationConfig.ApiKeys) == 0 {
return true, nil // If no keys are setup, accept everything
}
for _, validKey := range applicationConfig.ApiKeys {
if key == validKey {
return true, nil
}
}
return false, ErrMissingOrMalformedAPIKey
}
}
func getApiKeyRequiredFilterFunction(applicationConfig *config.ApplicationConfig) middleware.Skipper {
return func(c echo.Context) bool {
path := c.Request().URL.Path
for _, p := range applicationConfig.PathWithoutAuth {
if strings.HasPrefix(path, p) {
return true
}
}
// Handle GET request exemptions if enabled
if applicationConfig.DisableApiKeyRequirementForHttpGet {
if c.Request().Method != http.MethodGet {
return false
}
for _, rx := range applicationConfig.HttpGetExemptedEndpoints {
if rx.MatchString(c.Path()) {
return true
}
}
}
return false
}
}

View File

@@ -1,228 +0,0 @@
package middleware_test
import (
"net/http"
"net/http/httptest"
"github.com/labstack/echo/v4"
"github.com/mudler/LocalAI/core/config"
. "github.com/mudler/LocalAI/core/http/middleware"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
// ok is a simple handler that returns 200 OK.
func ok(c echo.Context) error {
return c.String(http.StatusOK, "ok")
}
// newAuthApp creates a minimal Echo app with auth middleware applied.
// Requests that fail auth with Content-Type: application/json get a JSON 401
// (no template renderer needed).
func newAuthApp(appConfig *config.ApplicationConfig) *echo.Echo {
e := echo.New()
mw, err := GetKeyAuthConfig(appConfig)
Expect(err).ToNot(HaveOccurred())
e.Use(mw)
// Sensitive API routes
e.GET("/v1/models", ok)
e.POST("/v1/chat/completions", ok)
// UI routes
e.GET("/app", ok)
e.GET("/app/*", ok)
e.GET("/browse", ok)
e.GET("/browse/*", ok)
e.GET("/login", ok)
e.GET("/explorer", ok)
e.GET("/assets/*", ok)
e.POST("/app", ok)
return e
}
// doRequest performs an HTTP request against the given Echo app and returns the recorder.
func doRequest(e *echo.Echo, method, path string, opts ...func(*http.Request)) *httptest.ResponseRecorder {
req := httptest.NewRequest(method, path, nil)
req.Header.Set("Content-Type", "application/json")
for _, opt := range opts {
opt(req)
}
rec := httptest.NewRecorder()
e.ServeHTTP(rec, req)
return rec
}
func withBearerToken(token string) func(*http.Request) {
return func(req *http.Request) {
req.Header.Set("Authorization", "Bearer "+token)
}
}
func withXApiKey(key string) func(*http.Request) {
return func(req *http.Request) {
req.Header.Set("x-api-key", key)
}
}
func withXiApiKey(key string) func(*http.Request) {
return func(req *http.Request) {
req.Header.Set("xi-api-key", key)
}
}
func withTokenCookie(token string) func(*http.Request) {
return func(req *http.Request) {
req.AddCookie(&http.Cookie{Name: "token", Value: token})
}
}
var _ = Describe("Auth Middleware", func() {
Context("when API keys are configured", func() {
var app *echo.Echo
const validKey = "sk-test-key-123"
BeforeEach(func() {
appConfig := config.NewApplicationConfig()
appConfig.ApiKeys = []string{validKey}
app = newAuthApp(appConfig)
})
It("returns 401 for GET request without a key", func() {
rec := doRequest(app, http.MethodGet, "/v1/models")
Expect(rec.Code).To(Equal(http.StatusUnauthorized))
})
It("returns 401 for POST request without a key", func() {
rec := doRequest(app, http.MethodPost, "/v1/chat/completions")
Expect(rec.Code).To(Equal(http.StatusUnauthorized))
})
It("returns 401 for request with an invalid key", func() {
rec := doRequest(app, http.MethodGet, "/v1/models", withBearerToken("wrong-key"))
Expect(rec.Code).To(Equal(http.StatusUnauthorized))
})
It("passes through with valid Bearer token in Authorization header", func() {
rec := doRequest(app, http.MethodGet, "/v1/models", withBearerToken(validKey))
Expect(rec.Code).To(Equal(http.StatusOK))
})
It("passes through with valid x-api-key header", func() {
rec := doRequest(app, http.MethodGet, "/v1/models", withXApiKey(validKey))
Expect(rec.Code).To(Equal(http.StatusOK))
})
It("passes through with valid xi-api-key header", func() {
rec := doRequest(app, http.MethodGet, "/v1/models", withXiApiKey(validKey))
Expect(rec.Code).To(Equal(http.StatusOK))
})
It("passes through with valid token cookie", func() {
rec := doRequest(app, http.MethodGet, "/v1/models", withTokenCookie(validKey))
Expect(rec.Code).To(Equal(http.StatusOK))
})
})
Context("when no API keys are configured", func() {
var app *echo.Echo
BeforeEach(func() {
appConfig := config.NewApplicationConfig()
app = newAuthApp(appConfig)
})
It("passes through without any key", func() {
rec := doRequest(app, http.MethodGet, "/v1/models")
Expect(rec.Code).To(Equal(http.StatusOK))
})
})
Context("GET exempted endpoints (feature enabled)", func() {
var app *echo.Echo
const validKey = "sk-test-key-456"
BeforeEach(func() {
appConfig := config.NewApplicationConfig(
config.WithApiKeys([]string{validKey}),
config.WithDisableApiKeyRequirementForHttpGet(true),
config.WithHttpGetExemptedEndpoints([]string{
"^/$",
"^/app(/.*)?$",
"^/browse(/.*)?$",
"^/login/?$",
"^/explorer/?$",
"^/assets/.*$",
"^/static/.*$",
"^/swagger.*$",
}),
)
app = newAuthApp(appConfig)
})
It("allows GET to /app without a key", func() {
rec := doRequest(app, http.MethodGet, "/app")
Expect(rec.Code).To(Equal(http.StatusOK))
})
It("allows GET to /app/chat/model sub-route without a key", func() {
rec := doRequest(app, http.MethodGet, "/app/chat/llama3")
Expect(rec.Code).To(Equal(http.StatusOK))
})
It("allows GET to /browse/models without a key", func() {
rec := doRequest(app, http.MethodGet, "/browse/models")
Expect(rec.Code).To(Equal(http.StatusOK))
})
It("allows GET to /login without a key", func() {
rec := doRequest(app, http.MethodGet, "/login")
Expect(rec.Code).To(Equal(http.StatusOK))
})
It("allows GET to /explorer without a key", func() {
rec := doRequest(app, http.MethodGet, "/explorer")
Expect(rec.Code).To(Equal(http.StatusOK))
})
It("allows GET to /assets/main.js without a key", func() {
rec := doRequest(app, http.MethodGet, "/assets/main.js")
Expect(rec.Code).To(Equal(http.StatusOK))
})
It("rejects POST to /app without a key", func() {
rec := doRequest(app, http.MethodPost, "/app")
Expect(rec.Code).To(Equal(http.StatusUnauthorized))
})
It("rejects GET to /v1/models without a key", func() {
rec := doRequest(app, http.MethodGet, "/v1/models")
Expect(rec.Code).To(Equal(http.StatusUnauthorized))
})
})
Context("GET exempted endpoints (feature disabled)", func() {
var app *echo.Echo
const validKey = "sk-test-key-789"
BeforeEach(func() {
appConfig := config.NewApplicationConfig(
config.WithApiKeys([]string{validKey}),
// DisableApiKeyRequirementForHttpGet defaults to false
config.WithHttpGetExemptedEndpoints([]string{
"^/$",
"^/app(/.*)?$",
}),
)
app = newAuthApp(appConfig)
})
It("requires auth for GET to /app even though it matches exempted pattern", func() {
rec := doRequest(app, http.MethodGet, "/app")
Expect(rec.Code).To(Equal(http.StatusUnauthorized))
})
})
})

View File

@@ -42,5 +42,6 @@ export function vendorColor(vendor) {
if (v.includes('nvidia')) return '#76b900'
if (v.includes('amd')) return '#ed1c24'
if (v.includes('intel')) return '#0071c5'
if (v.includes('apple')) return '#a2aaad'
return 'var(--color-accent)'
}

View File

@@ -157,11 +157,11 @@ func RegisterAuthRoutes(e *echo.Echo, app *application.Application) {
}
resp := map[string]any{
"authEnabled": authEnabled,
"staticApiKeyRequired": !authEnabled && len(appConfig.ApiKeys) > 0,
"providers": providers,
"hasUsers": hasUsers,
"registrationMode": registrationMode,
"authEnabled": authEnabled,
"staticApiKeyRequired": !authEnabled && len(appConfig.ApiKeys) > 0,
"providers": providers,
"hasUsers": hasUsers,
"registrationMode": registrationMode,
}
// Include current user if authenticated
@@ -186,7 +186,73 @@ func RegisterAuthRoutes(e *echo.Echo, app *application.Application) {
return c.JSON(http.StatusOK, resp)
})
// OAuth routes - only registered when auth is enabled
// Rate limiter for auth endpoints: 5 attempts per minute per IP
authRL := newRateLimiter(1*time.Minute, 5)
authRateLimitMw := rateLimitMiddleware(authRL)
// Start background goroutine to periodically prune stale IP entries
go func() {
ticker := time.NewTicker(10 * time.Minute)
defer ticker.Stop()
for {
select {
case <-appConfig.Context.Done():
return
case <-ticker.C:
authRL.cleanup()
}
}
}()
// POST /api/auth/token-login - authenticate with API key/token.
// Registered when auth DB or legacy API keys are configured.
if db != nil || len(appConfig.ApiKeys) > 0 {
e.POST("/api/auth/token-login", func(c echo.Context) error {
var body struct {
Token string `json:"token"`
}
if err := c.Bind(&body); err != nil || strings.TrimSpace(body.Token) == "" {
return c.JSON(http.StatusBadRequest, map[string]string{"error": "token is required"})
}
token := strings.TrimSpace(body.Token)
// Try as user API key (only when auth DB is available)
if db != nil {
if apiKey, err := auth.ValidateAPIKey(db, token, appConfig.Auth.APIKeyHMACSecret); err == nil {
sessionID, err := auth.CreateSession(db, apiKey.User.ID, appConfig.Auth.APIKeyHMACSecret)
if err != nil {
return c.JSON(http.StatusInternalServerError, map[string]string{"error": "failed to create session"})
}
auth.SetSessionCookie(c, sessionID)
return c.JSON(http.StatusOK, map[string]any{
"user": map[string]any{
"id": apiKey.User.ID,
"email": apiKey.User.Email,
"name": apiKey.User.Name,
"role": apiKey.User.Role,
},
})
}
}
// Try as legacy API key
if len(appConfig.ApiKeys) > 0 && isValidLegacyKey(token, appConfig) {
auth.SetTokenCookie(c, token)
return c.JSON(http.StatusOK, map[string]any{
"user": map[string]any{
"id": "legacy-api-key",
"name": "API Key User",
"role": auth.RoleAdmin,
},
})
}
return c.JSON(http.StatusUnauthorized, map[string]string{"error": "invalid token"})
}, authRateLimitMw)
}
// Remaining routes require auth DB
if db == nil {
return
}
@@ -219,24 +285,6 @@ func RegisterAuthRoutes(e *echo.Echo, app *application.Application) {
}
}
// Rate limiter for auth endpoints: 5 attempts per minute per IP
authRL := newRateLimiter(1*time.Minute, 5)
authRateLimitMw := rateLimitMiddleware(authRL)
// Start background goroutine to periodically prune stale IP entries (#12)
go func() {
ticker := time.NewTicker(10 * time.Minute)
defer ticker.Stop()
for {
select {
case <-appConfig.Context.Done():
return
case <-ticker.C:
authRL.cleanup()
}
}
}()
// POST /api/auth/register - public, email/password registration
e.POST("/api/auth/register", func(c echo.Context) error {
if appConfig.Auth.DisableLocalAuth {
@@ -427,53 +475,6 @@ func RegisterAuthRoutes(e *echo.Echo, app *application.Application) {
})
}, authRateLimitMw)
// POST /api/auth/token-login - public, authenticate with API key/token (#3)
e.POST("/api/auth/token-login", func(c echo.Context) error {
var body struct {
Token string `json:"token"`
}
if err := c.Bind(&body); err != nil || strings.TrimSpace(body.Token) == "" {
return c.JSON(http.StatusBadRequest, map[string]string{"error": "token is required"})
}
token := strings.TrimSpace(body.Token)
hmacSecret := appConfig.Auth.APIKeyHMACSecret
// Try as user API key
if apiKey, err := auth.ValidateAPIKey(db, token, hmacSecret); err == nil {
sessionID, err := auth.CreateSession(db, apiKey.User.ID, appConfig.Auth.APIKeyHMACSecret)
if err != nil {
return c.JSON(http.StatusInternalServerError, map[string]string{"error": "failed to create session"})
}
auth.SetSessionCookie(c, sessionID)
return c.JSON(http.StatusOK, map[string]any{
"user": map[string]any{
"id": apiKey.User.ID,
"email": apiKey.User.Email,
"name": apiKey.User.Name,
"role": apiKey.User.Role,
},
})
}
// Try as legacy API key
if len(appConfig.ApiKeys) > 0 && isValidLegacyKey(token, appConfig) {
// Create a synthetic session cookie with the token for legacy mode
auth.SetTokenCookie(c, token)
return c.JSON(http.StatusOK, map[string]any{
"user": map[string]any{
"id": "legacy-api-key",
"name": "API Key User",
"role": auth.RoleAdmin,
},
})
}
return c.JSON(http.StatusUnauthorized, map[string]string{"error": "invalid token"})
}, authRateLimitMw)
// POST /api/auth/logout - requires auth
e.POST("/api/auth/logout", func(c echo.Context) error {
user := auth.GetUser(c)

View File

@@ -166,6 +166,67 @@ This section provides step-by-step instructions for configuring specific softwar
After saving the configuration file, restart OpenCode for the changes to take effect.
### Claude Code
[Claude Code](https://docs.anthropic.com/en/docs/claude-code) is Anthropic's official CLI tool for coding with Claude. LocalAI implements the Anthropic Messages API (`/v1/messages`), so Claude Code can be pointed directly at a LocalAI instance.
#### Prerequisites
- LocalAI must be running and accessible (either locally or on a network)
- You need to know your LocalAI server's IP address/hostname and port (default is `8080`)
- An API key configured in your LocalAI instance
#### Running Claude Code with LocalAI
Set the `ANTHROPIC_BASE_URL` and `ANTHROPIC_API_KEY` environment variables to point Claude Code at your LocalAI server:
```bash
ANTHROPIC_BASE_URL=http://127.0.0.1:8080 \
ANTHROPIC_API_KEY=your-localai-api-key \
claude --model your-model-name
```
For example, if you have a Gemma model loaded:
```bash
ANTHROPIC_BASE_URL=http://127.0.0.1:8080 \
ANTHROPIC_API_KEY=your-localai-api-key \
claude --model gemma-4-12B-it-GGUF
```
You can also run a single prompt non-interactively:
```bash
ANTHROPIC_BASE_URL=http://127.0.0.1:8080 \
ANTHROPIC_API_KEY=your-localai-api-key \
claude -p "list the files in /tmp" --model your-model-name
```
#### Configuration
To avoid setting environment variables every time, you can add them to your shell profile (e.g., `~/.bashrc` or `~/.zshrc`):
```bash
export ANTHROPIC_BASE_URL=http://127.0.0.1:8080
export ANTHROPIC_API_KEY=your-localai-api-key
```
#### Verify available models
Check which models are available in your LocalAI instance:
```bash
curl http://127.0.0.1:8080/v1/models
```
Use one of the listed model IDs as the `--model` argument.
#### Notes
- Models with tool calling support (e.g., Gemma 4, Qwen 3) work best, as Claude Code relies heavily on tool use for file operations and code editing.
- Larger models generally produce better results for complex coding tasks.
- The Anthropic Messages API endpoint supports both streaming and non-streaming modes.
### Charm Crush
You can ask [Charm Crush](https://charm.land/crush) to generate your config by giving it this documentation's URL and your LocalAI instance URL. The configuration will look something like the following and goes in `~/.config/crush/crush.json`:

View File

@@ -1,3 +1,3 @@
{
"version": "v4.1.1"
"version": "v4.1.2"
}

46
go.mod
View File

@@ -8,9 +8,9 @@ require (
github.com/Masterminds/sprig/v3 v3.3.0
github.com/alecthomas/kong v1.14.0
github.com/anthropics/anthropic-sdk-go v1.27.0
github.com/aws/aws-sdk-go-v2 v1.41.4
github.com/aws/aws-sdk-go-v2/config v1.32.12
github.com/aws/aws-sdk-go-v2/credentials v1.19.12
github.com/aws/aws-sdk-go-v2 v1.41.5
github.com/aws/aws-sdk-go-v2/config v1.32.14
github.com/aws/aws-sdk-go-v2/credentials v1.19.14
github.com/aws/aws-sdk-go-v2/service/s3 v1.97.1
github.com/charmbracelet/glamour v0.10.0
github.com/containerd/containerd v1.7.30
@@ -27,7 +27,7 @@ require (
github.com/gpustack/gguf-parser-go v0.24.0
github.com/hpcloud/tail v1.0.0
github.com/ipfs/go-log v1.0.5
github.com/jaypipes/ghw v0.23.0
github.com/jaypipes/ghw v0.24.0
github.com/joho/godotenv v1.5.1
github.com/klauspost/cpuid/v2 v2.3.0
github.com/labstack/echo/v4 v4.15.1
@@ -39,7 +39,7 @@ require (
github.com/mudler/cogito v0.9.5-0.20260315222927-63abdec7189b
github.com/mudler/edgevpn v0.31.1
github.com/mudler/go-processmanager v0.1.0
github.com/mudler/memory v0.0.0-20251216220809-d1256471a6c2
github.com/mudler/memory v0.0.0-20260406210934-424c1ecf2cf8
github.com/mudler/xlog v0.0.6
github.com/nats-io/nats.go v1.50.0
github.com/onsi/ginkgo/v2 v2.28.1
@@ -60,11 +60,11 @@ require (
github.com/testcontainers/testcontainers-go v0.41.0
github.com/testcontainers/testcontainers-go/modules/nats v0.41.0
github.com/testcontainers/testcontainers-go/modules/postgres v0.41.0
go.opentelemetry.io/otel v1.42.0
go.opentelemetry.io/otel/exporters/prometheus v0.64.0
go.opentelemetry.io/otel/metric v1.42.0
go.opentelemetry.io/otel/sdk/metric v1.42.0
google.golang.org/grpc v1.79.3
go.opentelemetry.io/otel v1.43.0
go.opentelemetry.io/otel/exporters/prometheus v0.65.0
go.opentelemetry.io/otel/metric v1.43.0
go.opentelemetry.io/otel/sdk/metric v1.43.0
google.golang.org/grpc v1.80.0
google.golang.org/protobuf v1.36.11
gopkg.in/yaml.v3 v3.0.1
gorm.io/driver/postgres v1.6.0
@@ -75,19 +75,19 @@ require (
require (
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.7 // indirect
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.20 // indirect
github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.20 // indirect
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.20 // indirect
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.21 // indirect
github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.21 // indirect
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.21 // indirect
github.com/aws/aws-sdk-go-v2/internal/ini v1.8.6 // indirect
github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.21 // indirect
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.7 // indirect
github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.12 // indirect
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.20 // indirect
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.21 // indirect
github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.20 // indirect
github.com/aws/aws-sdk-go-v2/service/signin v1.0.8 // indirect
github.com/aws/aws-sdk-go-v2/service/sso v1.30.13 // indirect
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.17 // indirect
github.com/aws/aws-sdk-go-v2/service/sts v1.41.9 // indirect
github.com/aws/aws-sdk-go-v2/service/signin v1.0.9 // indirect
github.com/aws/aws-sdk-go-v2/service/sso v1.30.15 // indirect
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.19 // indirect
github.com/aws/aws-sdk-go-v2/service/sts v1.41.10 // indirect
github.com/aws/smithy-go v1.24.2 // indirect
github.com/go-jose/go-jose/v4 v4.1.3 // indirect
github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.3 // indirect
@@ -413,7 +413,7 @@ require (
github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 // indirect
github.com/prometheus/client_model v0.6.2 // indirect
github.com/prometheus/common v0.67.5 // indirect
github.com/prometheus/procfs v0.19.2 // indirect
github.com/prometheus/procfs v0.20.1 // indirect
github.com/quic-go/qpack v0.5.1 // indirect
github.com/quic-go/quic-go v0.54.1 // indirect
github.com/quic-go/webtransport-go v0.9.0 // indirect
@@ -438,8 +438,8 @@ require (
github.com/yuin/goldmark-emoji v1.0.5 // indirect
github.com/yusufpapurcu/wmi v1.2.4 // indirect
go.opencensus.io v0.24.0 // indirect
go.opentelemetry.io/otel/sdk v1.42.0 // indirect
go.opentelemetry.io/otel/trace v1.42.0 // indirect
go.opentelemetry.io/otel/sdk v1.43.0 // indirect
go.opentelemetry.io/otel/trace v1.43.0 // indirect
go.uber.org/dig v1.19.0 // indirect
go.uber.org/fx v1.24.0 // indirect
go.uber.org/multierr v1.11.0 // indirect
@@ -455,8 +455,8 @@ require (
golang.zx2c4.com/wintun v0.0.0-20230126152724-0fa3db229ce2 // indirect
golang.zx2c4.com/wireguard v0.0.0-20250521234502-f333402bd9cb // indirect
golang.zx2c4.com/wireguard/windows v0.5.3 // indirect
gonum.org/v1/gonum v0.16.0 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20251202230838-ff82c1b0f217 // indirect
gonum.org/v1/gonum v0.17.0 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20260120221211-b8f7ae30c516 // indirect
gopkg.in/fsnotify.v1 v1.4.7 // indirect
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 // indirect
howett.net/plist v1.0.2-0.20250314012144-ee69052608d9 // indirect

94
go.sum
View File

@@ -70,20 +70,20 @@ github.com/anthropics/anthropic-sdk-go v1.27.0 h1:0CWbmBq5ofGAjF2H6lefCNRbnaUMGi
github.com/anthropics/anthropic-sdk-go v1.27.0/go.mod h1:qUKmaW+uuPB64iy1l+4kOSvaLqPXnHTTBKH6RVZ7q5Q=
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio=
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs=
github.com/aws/aws-sdk-go-v2 v1.41.4 h1:10f50G7WyU02T56ox1wWXq+zTX9I1zxG46HYuG1hH/k=
github.com/aws/aws-sdk-go-v2 v1.41.4/go.mod h1:mwsPRE8ceUUpiTgF7QmQIJ7lgsKUPQOUl3o72QBrE1o=
github.com/aws/aws-sdk-go-v2 v1.41.5 h1:dj5kopbwUsVUVFgO4Fi5BIT3t4WyqIDjGKCangnV/yY=
github.com/aws/aws-sdk-go-v2 v1.41.5/go.mod h1:mwsPRE8ceUUpiTgF7QmQIJ7lgsKUPQOUl3o72QBrE1o=
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.7 h1:3kGOqnh1pPeddVa/E37XNTaWJ8W6vrbYV9lJEkCnhuY=
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.7/go.mod h1:lyw7GFp3qENLh7kwzf7iMzAxDn+NzjXEAGjKS2UOKqI=
github.com/aws/aws-sdk-go-v2/config v1.32.12 h1:O3csC7HUGn2895eNrLytOJQdoL2xyJy0iYXhoZ1OmP0=
github.com/aws/aws-sdk-go-v2/config v1.32.12/go.mod h1:96zTvoOFR4FURjI+/5wY1vc1ABceROO4lWgWJuxgy0g=
github.com/aws/aws-sdk-go-v2/credentials v1.19.12 h1:oqtA6v+y5fZg//tcTWahyN9PEn5eDU/Wpvc2+kJ4aY8=
github.com/aws/aws-sdk-go-v2/credentials v1.19.12/go.mod h1:U3R1RtSHx6NB0DvEQFGyf/0sbrpJrluENHdPy1j/3TE=
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.20 h1:zOgq3uezl5nznfoK3ODuqbhVg1JzAGDUhXOsU0IDCAo=
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.20/go.mod h1:z/MVwUARehy6GAg/yQ1GO2IMl0k++cu1ohP9zo887wE=
github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.20 h1:CNXO7mvgThFGqOFgbNAP2nol2qAWBOGfqR/7tQlvLmc=
github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.20/go.mod h1:oydPDJKcfMhgfcgBUZaG+toBbwy8yPWubJXBVERtI4o=
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.20 h1:tN6W/hg+pkM+tf9XDkWUbDEjGLb+raoBMFsTodcoYKw=
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.20/go.mod h1:YJ898MhD067hSHA6xYCx5ts/jEd8BSOLtQDL3iZsvbc=
github.com/aws/aws-sdk-go-v2/config v1.32.14 h1:opVIRo/ZbbI8OIqSOKmpFaY7IwfFUOCCXBsUpJOwDdI=
github.com/aws/aws-sdk-go-v2/config v1.32.14/go.mod h1:U4/V0uKxh0Tl5sxmCBZ3AecYny4UNlVmObYjKuuaiOo=
github.com/aws/aws-sdk-go-v2/credentials v1.19.14 h1:n+UcGWAIZHkXzYt87uMFBv/l8THYELoX6gVcUvgl6fI=
github.com/aws/aws-sdk-go-v2/credentials v1.19.14/go.mod h1:cJKuyWB59Mqi0jM3nFYQRmnHVQIcgoxjEMAbLkpr62w=
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.21 h1:NUS3K4BTDArQqNu2ih7yeDLaS3bmHD0YndtA6UP884g=
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.21/go.mod h1:YWNWJQNjKigKY1RHVJCuupeWDrrHjRqHm0N9rdrWzYI=
github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.21 h1:Rgg6wvjjtX8bNHcvi9OnXWwcE0a2vGpbwmtICOsvcf4=
github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.21/go.mod h1:A/kJFst/nm//cyqonihbdpQZwiUhhzpqTsdbhDdRF9c=
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.21 h1:PEgGVtPoB6NTpPrBgqSE5hE/o47Ij9qk/SEZFbUOe9A=
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.21/go.mod h1:p+hz+PRAYlY3zcpJhPwXlLC4C+kqn70WIHwnzAfs6ps=
github.com/aws/aws-sdk-go-v2/internal/ini v1.8.6 h1:qYQ4pzQ2Oz6WpQ8T3HvGHnZydA72MnLuFK9tJwmrbHw=
github.com/aws/aws-sdk-go-v2/internal/ini v1.8.6/go.mod h1:O3h0IK87yXci+kg6flUKzJnWeziQUKciKrLjcatSNcY=
github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.21 h1:SwGMTMLIlvDNyhMteQ6r8IJSBPlRdXX5d4idhIGbkXA=
@@ -92,20 +92,20 @@ github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.7 h1:5EniKhL
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.7/go.mod h1:x0nZssQ3qZSnIcePWLvcoFisRXJzcTVvYpAAdYX8+GI=
github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.12 h1:qtJZ70afD3ISKWnoX3xB0J2otEqu3LqicRcDBqsj0hQ=
github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.12/go.mod h1:v2pNpJbRNl4vEUWEh5ytQok0zACAKfdmKS51Hotc3pQ=
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.20 h1:2HvVAIq+YqgGotK6EkMf+KIEqTISmTYh5zLpYyeTo1Y=
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.20/go.mod h1:V4X406Y666khGa8ghKmphma/7C0DAtEQYhkq9z4vpbk=
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.21 h1:c31//R3xgIJMSC8S6hEVq+38DcvUlgFY0FM6mSI5oto=
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.21/go.mod h1:r6+pf23ouCB718FUxaqzZdbpYFyDtehyZcmP5KL9FkA=
github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.20 h1:siU1A6xjUZ2N8zjTHSXFhB9L/2OY8Dqs0xXiLjF30jA=
github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.20/go.mod h1:4TLZCmVJDM3FOu5P5TJP0zOlu9zWgDWU7aUxWbr+rcw=
github.com/aws/aws-sdk-go-v2/service/s3 v1.97.1 h1:csi9NLpFZXb9fxY7rS1xVzgPRGMt7MSNWeQ6eo247kE=
github.com/aws/aws-sdk-go-v2/service/s3 v1.97.1/go.mod h1:qXVal5H0ChqXP63t6jze5LmFalc7+ZE7wOdLtZ0LCP0=
github.com/aws/aws-sdk-go-v2/service/signin v1.0.8 h1:0GFOLzEbOyZABS3PhYfBIx2rNBACYcKty+XGkTgw1ow=
github.com/aws/aws-sdk-go-v2/service/signin v1.0.8/go.mod h1:LXypKvk85AROkKhOG6/YEcHFPoX+prKTowKnVdcaIxE=
github.com/aws/aws-sdk-go-v2/service/sso v1.30.13 h1:kiIDLZ005EcKomYYITtfsjn7dtOwHDOFy7IbPXKek2o=
github.com/aws/aws-sdk-go-v2/service/sso v1.30.13/go.mod h1:2h/xGEowcW/g38g06g3KpRWDlT+OTfxxI0o1KqayAB8=
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.17 h1:jzKAXIlhZhJbnYwHbvUQZEB8KfgAEuG0dc08Bkda7NU=
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.17/go.mod h1:Al9fFsXjv4KfbzQHGe6V4NZSZQXecFcvaIF4e70FoRA=
github.com/aws/aws-sdk-go-v2/service/sts v1.41.9 h1:Cng+OOwCHmFljXIxpEVXAGMnBia8MSU6Ch5i9PgBkcU=
github.com/aws/aws-sdk-go-v2/service/sts v1.41.9/go.mod h1:LrlIndBDdjA/EeXeyNBle+gyCwTlizzW5ycgWnvIxkk=
github.com/aws/aws-sdk-go-v2/service/signin v1.0.9 h1:QKZH0S178gCmFEgst8hN0mCX1KxLgHBKKY/CLqwP8lg=
github.com/aws/aws-sdk-go-v2/service/signin v1.0.9/go.mod h1:7yuQJoT+OoH8aqIxw9vwF+8KpvLZ8AWmvmUWHsGQZvI=
github.com/aws/aws-sdk-go-v2/service/sso v1.30.15 h1:lFd1+ZSEYJZYvv9d6kXzhkZu07si3f+GQ1AaYwa2LUM=
github.com/aws/aws-sdk-go-v2/service/sso v1.30.15/go.mod h1:WSvS1NLr7JaPunCXqpJnWk1Bjo7IxzZXrZi1QQCkuqM=
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.19 h1:dzztQ1YmfPrxdrOiuZRMF6fuOwWlWpD2StNLTceKpys=
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.19/go.mod h1:YO8TrYtFdl5w/4vmjL8zaBSsiNp3w0L1FfKVKenZT7w=
github.com/aws/aws-sdk-go-v2/service/sts v1.41.10 h1:p8ogvvLugcR/zLBXTXrTkj0RYBUdErbMnAFFp12Lm/U=
github.com/aws/aws-sdk-go-v2/service/sts v1.41.10/go.mod h1:60dv0eZJfeVXfbT1tFJinbHrDfSJ2GZl4Q//OSSNAVw=
github.com/aws/smithy-go v1.24.2 h1:FzA3bu/nt/vDvmnkg+R8Xl46gmzEDam6mZ1hzmwXFng=
github.com/aws/smithy-go v1.24.2/go.mod h1:YE2RhdIuDbA5E5bTdciG9KrW3+TiEONeUWCqxX9i1Fc=
github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k=
@@ -516,8 +516,8 @@ github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo
github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=
github.com/jackpal/go-nat-pmp v1.0.2 h1:KzKSgb7qkJvOUTqYl9/Hg/me3pWgBmERKrTGD7BdWus=
github.com/jackpal/go-nat-pmp v1.0.2/go.mod h1:QPH045xvCAeXUZOxsnwmrtiCoxIr9eob+4orBN1SBKc=
github.com/jaypipes/ghw v0.23.0 h1:WOL4hpLcIu1kIm+z5Oz19Tk1HNw/Sncrx/6GS8O0Kl0=
github.com/jaypipes/ghw v0.23.0/go.mod h1:fUNUjMZ0cjahKo+/u+32m9FutIx53Nkbi0Ti0m7j5HY=
github.com/jaypipes/ghw v0.24.0 h1:6RBrJzvHvZ0t+hSvqPmOd5b21C4fMsyiyFzWljEj8Wg=
github.com/jaypipes/ghw v0.24.0/go.mod h1:Qk3UjdH8Xu/OiVyb/eDJqnDsUc+awHU75y23ErZU33s=
github.com/jaypipes/pcidb v1.1.1 h1:QmPhpsbmmnCwZmHeYAATxEaoRuiMAJusKYkUncMC0ro=
github.com/jaypipes/pcidb v1.1.1/go.mod h1:x27LT2krrUgjf875KxQXKB0Ha/YXLdZRVmw6hH0G7g8=
github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 h1:BQSFePA1RWJOlocH6Fxy8MmwDt+yVQYULKfN0RoTN8A=
@@ -723,6 +723,8 @@ github.com/mudler/localrecall v0.5.9-0.20260321005011-810084e9369b h1:XeAnOEOOSK
github.com/mudler/localrecall v0.5.9-0.20260321005011-810084e9369b/go.mod h1:xuPtgL9zUyiQLmspYzO3kaboYrGbWmwi8BQPt1aCAcs=
github.com/mudler/memory v0.0.0-20251216220809-d1256471a6c2 h1:+WHsL/j6EWOMUiMVIOJNKOwSKiQt/qDPc9fePCf87fA=
github.com/mudler/memory v0.0.0-20251216220809-d1256471a6c2/go.mod h1:EA8Ashhd56o32qN7ouPKFSRUs/Z+LrRCF4v6R2Oarm8=
github.com/mudler/memory v0.0.0-20260406210934-424c1ecf2cf8 h1:Ry8RiWy8fZ6Ff4E7dPmjRsBrnHOnPeOOj2LhCgyjQu0=
github.com/mudler/memory v0.0.0-20260406210934-424c1ecf2cf8/go.mod h1:EA8Ashhd56o32qN7ouPKFSRUs/Z+LrRCF4v6R2Oarm8=
github.com/mudler/skillserver v0.0.6 h1:ixz6wUekLdTmbnpAavCkTydDF6UdXAG3ncYufSPK9G0=
github.com/mudler/skillserver v0.0.6/go.mod h1:z3yFhcL9bSykmmh6xgGu0hyoItd4CnxgtWMEWw8uFJU=
github.com/mudler/water v0.0.0-20250808092830-dd90dcf09025 h1:WFLP5FHInarYGXi6B/Ze204x7Xy6q/I4nCZnWEyPHK0=
@@ -882,8 +884,8 @@ github.com/prometheus/common v0.67.5/go.mod h1:SjE/0MzDEEAyrdr5Gqc6G+sXI67maCxza
github.com/prometheus/otlptranslator v1.0.0 h1:s0LJW/iN9dkIH+EnhiD3BlkkP5QVIUVEoIwkU+A6qos=
github.com/prometheus/otlptranslator v1.0.0/go.mod h1:vRYWnXvI6aWGpsdY/mOT/cbeVRBlPWtBNDb7kGR3uKM=
github.com/prometheus/procfs v0.0.0-20180725123919-05ee40e3a273/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
github.com/prometheus/procfs v0.19.2 h1:zUMhqEW66Ex7OXIiDkll3tl9a1ZdilUOd/F6ZXw4Vws=
github.com/prometheus/procfs v0.19.2/go.mod h1:M0aotyiemPhBCM0z5w87kL22CxfcH05ZpYlu+b4J7mw=
github.com/prometheus/procfs v0.20.1 h1:XwbrGOIplXW/AU3YhIhLODXMJYyC1isLFfYCsTEycfc=
github.com/prometheus/procfs v0.20.1/go.mod h1:o9EMBZGRyvDrSPH1RqdxhojkuXstoe4UlK79eF5TGGo=
github.com/quic-go/qpack v0.5.1 h1:giqksBPnT/HDtZ6VhtFKgoLOWmlyo9Ei6u9PqzIMbhI=
github.com/quic-go/qpack v0.5.1/go.mod h1:+PC4XFrEskIVkcLzpEkbLqq1uCoxPhQuvK5rH1ZgaEg=
github.com/quic-go/quic-go v0.54.1 h1:4ZAWm0AhCb6+hE+l5Q1NAL0iRn/ZrMwqHRGQiFwj2eg=
@@ -1097,22 +1099,22 @@ go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ
go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 h1:F7Jx+6hwnZ41NSFTO5q4LYDtJRXBf2PD0rNBkeB/lus=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0/go.mod h1:UHB22Z8QsdRDrnAtX4PntOl36ajSxcdUMt1sF7Y6E7Q=
go.opentelemetry.io/otel v1.42.0 h1:lSQGzTgVR3+sgJDAU/7/ZMjN9Z+vUip7leaqBKy4sho=
go.opentelemetry.io/otel v1.42.0/go.mod h1:lJNsdRMxCUIWuMlVJWzecSMuNjE7dOYyWlqOXWkdqCc=
go.opentelemetry.io/otel v1.43.0 h1:mYIM03dnh5zfN7HautFE4ieIig9amkNANT+xcVxAj9I=
go.opentelemetry.io/otel v1.43.0/go.mod h1:JuG+u74mvjvcm8vj8pI5XiHy1zDeoCS2LB1spIq7Ay0=
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0 h1:GqRJVj7UmLjCVyVJ3ZFLdPRmhDUp2zFmQe3RHIOsw24=
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0/go.mod h1:ri3aaHSmCTVYu2AWv44YMauwAQc0aqI9gHKIcSbI1pU=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.41.0 h1:inYW9ZhgqiDqh6BioM7DVHHzEGVq76Db5897WLGZ5Go=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.41.0/go.mod h1:Izur+Wt8gClgMJqO/cZ8wdeeMryJ/xxiOVgFSSfpDTY=
go.opentelemetry.io/otel/exporters/prometheus v0.64.0 h1:g0LRDXMX/G1SEZtK8zl8Chm4K6GBwRkjPKE36LxiTYs=
go.opentelemetry.io/otel/exporters/prometheus v0.64.0/go.mod h1:UrgcjnarfdlBDP3GjDIJWe6HTprwSazNjwsI+Ru6hro=
go.opentelemetry.io/otel/metric v1.42.0 h1:2jXG+3oZLNXEPfNmnpxKDeZsFI5o4J+nz6xUlaFdF/4=
go.opentelemetry.io/otel/metric v1.42.0/go.mod h1:RlUN/7vTU7Ao/diDkEpQpnz3/92J9ko05BIwxYa2SSI=
go.opentelemetry.io/otel/sdk v1.42.0 h1:LyC8+jqk6UJwdrI/8VydAq/hvkFKNHZVIWuslJXYsDo=
go.opentelemetry.io/otel/sdk v1.42.0/go.mod h1:rGHCAxd9DAph0joO4W6OPwxjNTYWghRWmkHuGbayMts=
go.opentelemetry.io/otel/sdk/metric v1.42.0 h1:D/1QR46Clz6ajyZ3G8SgNlTJKBdGp84q9RKCAZ3YGuA=
go.opentelemetry.io/otel/sdk/metric v1.42.0/go.mod h1:Ua6AAlDKdZ7tdvaQKfSmnFTdHx37+J4ba8MwVCYM5hc=
go.opentelemetry.io/otel/trace v1.42.0 h1:OUCgIPt+mzOnaUTpOQcBiM/PLQ/Op7oq6g4LenLmOYY=
go.opentelemetry.io/otel/trace v1.42.0/go.mod h1:f3K9S+IFqnumBkKhRJMeaZeNk9epyhnCmQh/EysQCdc=
go.opentelemetry.io/otel/exporters/prometheus v0.65.0 h1:jOveH/b4lU9HT7y+Gfamf18BqlOuz2PWEvs8yM7Q6XE=
go.opentelemetry.io/otel/exporters/prometheus v0.65.0/go.mod h1:i1P8pcumauPtUI4YNopea1dhzEMuEqWP1xoUZDylLHo=
go.opentelemetry.io/otel/metric v1.43.0 h1:d7638QeInOnuwOONPp4JAOGfbCEpYb+K6DVWvdxGzgM=
go.opentelemetry.io/otel/metric v1.43.0/go.mod h1:RDnPtIxvqlgO8GRW18W6Z/4P462ldprJtfxHxyKd2PY=
go.opentelemetry.io/otel/sdk v1.43.0 h1:pi5mE86i5rTeLXqoF/hhiBtUNcrAGHLKQdhg4h4V9Dg=
go.opentelemetry.io/otel/sdk v1.43.0/go.mod h1:P+IkVU3iWukmiit/Yf9AWvpyRDlUeBaRg6Y+C58QHzg=
go.opentelemetry.io/otel/sdk/metric v1.43.0 h1:S88dyqXjJkuBNLeMcVPRFXpRw2fuwdvfCGLEo89fDkw=
go.opentelemetry.io/otel/sdk/metric v1.43.0/go.mod h1:C/RJtwSEJ5hzTiUz5pXF1kILHStzb9zFlIEe85bhj6A=
go.opentelemetry.io/otel/trace v1.43.0 h1:BkNrHpup+4k4w+ZZ86CZoHHEkohws8AY+WTX09nk+3A=
go.opentelemetry.io/otel/trace v1.43.0/go.mod h1:/QJhyVBUUswCphDVxq+8mld+AvhXZLhe+8WVFxiFff0=
go.opentelemetry.io/proto/otlp v1.8.0 h1:fRAZQDcAFHySxpJ1TwlA1cJ4tvcrw7nXl9xWWC8N5CE=
go.opentelemetry.io/proto/otlp v1.8.0/go.mod h1:tIeYOeNBU4cvmPqpaji1P+KbB4Oloai8wN4rWzRrFF0=
go.starlark.net v0.0.0-20250417143717-f57e51f710eb h1:zOg9DxxrorEmgGUr5UPdCEwKqiqG0MlZciuCuA3XiDE=
@@ -1342,8 +1344,8 @@ golang.zx2c4.com/wireguard v0.0.0-20250521234502-f333402bd9cb h1:whnFRlWMcXI9d+Z
golang.zx2c4.com/wireguard v0.0.0-20250521234502-f333402bd9cb/go.mod h1:rpwXGsirqLqN2L0JDJQlwOboGHmptD5ZD6T2VmcqhTw=
golang.zx2c4.com/wireguard/windows v0.5.3 h1:On6j2Rpn3OEMXqBq00QEDC7bWSZrPIHKIus8eIuExIE=
golang.zx2c4.com/wireguard/windows v0.5.3/go.mod h1:9TEe8TJmtwyQebdFwAkEWOPr3prrtqm+REGFifP60hI=
gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk=
gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E=
gonum.org/v1/gonum v0.17.0 h1:VbpOemQlsSMrYmn7T2OUvQ4dqxQXU+ouZFQsZOx50z4=
gonum.org/v1/gonum v0.17.0/go.mod h1:El3tOrEuMpv2UdMrbNlKEh9vd86bmQ6vqIcDwxEOc1E=
google.golang.org/api v0.0.0-20180910000450-7ca32eb868bf/go.mod h1:4mhQ8q/RsB7i+udVvVy5NUi08OU8ZlA0gRVgrF7VFY0=
google.golang.org/api v0.0.0-20181030000543-1d582fd0359e/go.mod h1:4mhQ8q/RsB7i+udVvVy5NUi08OU8ZlA0gRVgrF7VFY0=
google.golang.org/api v0.1.0/go.mod h1:UGEZY7KEX120AnNLIHFMKIo4obdJhkp2tPbaPlQx13Y=
@@ -1361,10 +1363,10 @@ google.golang.org/genproto v0.0.0-20190306203927-b5d61aea6440/go.mod h1:VzzqZJRn
google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
google.golang.org/genproto v0.0.0-20241118233622-e639e219e697 h1:ToEetK57OidYuqD4Q5w+vfEnPvPpuTwedCNVohYJfNk=
google.golang.org/genproto/googleapis/api v0.0.0-20251202230838-ff82c1b0f217 h1:fCvbg86sFXwdrl5LgVcTEvNC+2txB5mgROGmRL5mrls=
google.golang.org/genproto/googleapis/api v0.0.0-20251202230838-ff82c1b0f217/go.mod h1:+rXWjjaukWZun3mLfjmVnQi18E1AsFbDN9QdJ5YXLto=
google.golang.org/genproto/googleapis/rpc v0.0.0-20251202230838-ff82c1b0f217 h1:gRkg/vSppuSQoDjxyiGfN4Upv/h/DQmIR10ZU8dh4Ww=
google.golang.org/genproto/googleapis/rpc v0.0.0-20251202230838-ff82c1b0f217/go.mod h1:7i2o+ce6H/6BluujYR+kqX3GKH+dChPTQU19wjRPiGk=
google.golang.org/genproto/googleapis/api v0.0.0-20260120221211-b8f7ae30c516 h1:vmC/ws+pLzWjj/gzApyoZuSVrDtF1aod4u/+bbj8hgM=
google.golang.org/genproto/googleapis/api v0.0.0-20260120221211-b8f7ae30c516/go.mod h1:p3MLuOwURrGBRoEyFHBT3GjUwaCQVKeNqqWxlcISGdw=
google.golang.org/genproto/googleapis/rpc v0.0.0-20260120221211-b8f7ae30c516 h1:sNrWoksmOyF5bvJUcnmbeAmQi8baNhqg5IWaI3llQqU=
google.golang.org/genproto/googleapis/rpc v0.0.0-20260120221211-b8f7ae30c516/go.mod h1:j9x/tPzZkyxcgEFkiKEEGxfvyumM01BEtsW8xzOahRQ=
google.golang.org/grpc v1.14.0/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw=
google.golang.org/grpc v1.16.0/go.mod h1:0JHn/cJsOMiMfNA9+DeHDlAU7KAAB5GDlYFpa9MZMio=
google.golang.org/grpc v1.17.0/go.mod h1:6QZJwpn2B+Zp71q/5VxRsJ6NXXVCE5NRUHRo+f3cWCs=
@@ -1373,8 +1375,8 @@ google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyac
google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY=
google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc=
google.golang.org/grpc v1.79.3 h1:sybAEdRIEtvcD68Gx7dmnwjZKlyfuc61Dyo9pGXXkKE=
google.golang.org/grpc v1.79.3/go.mod h1:KmT0Kjez+0dde/v2j9vzwoAScgEPx/Bw1CYChhHLrHQ=
google.golang.org/grpc v1.80.0 h1:Xr6m2WmWZLETvUNvIUmeD5OAagMw3FiKmMlTdViWsHM=
google.golang.org/grpc v1.80.0/go.mod h1:ho/dLnxwi3EDJA4Zghp7k2Ec1+c2jqup0bFkw07bwF4=
google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=

View File

@@ -19,6 +19,7 @@ const (
VendorNVIDIA = "nvidia"
VendorAMD = "amd"
VendorIntel = "intel"
VendorApple = "apple"
VendorVulkan = "vulkan"
VendorUnknown = "unknown"
)
@@ -29,7 +30,8 @@ const (
var UnifiedMemoryDevices = []string{
"NVIDIA GB10",
"GB10",
// Add more unified memory devices here as needed
"NVIDIA Thor",
"Thor",
}
// GPUMemoryInfo contains real-time GPU memory usage information
@@ -196,6 +198,12 @@ func DetectGPUVendor() (string, error) {
return VendorVulkan, nil
}
// Check for Apple Silicon (macOS)
if appleGPUs := getAppleGPUMemory(); len(appleGPUs) > 0 {
xlog.Debug("GPU vendor detected via system_profiler", "vendor", VendorApple)
return VendorApple, nil
}
// No vendor detected
return "", nil
}
@@ -258,6 +266,12 @@ func GetGPUMemoryUsage() []GPUMemoryInfo {
gpus = append(gpus, vulkanGPUs...)
}
// Try Apple Silicon (macOS only)
if len(gpus) == 0 {
appleGPUs := getAppleGPUMemory()
gpus = append(gpus, appleGPUs...)
}
return gpus
}
@@ -351,18 +365,44 @@ func getNVIDIAGPUMemory() []GPUMemoryInfo {
usagePercent = float64(usedBytes) / float64(totalBytes) * 100
}
} else if isNA {
// Unknown device with N/A values - skip memory info
xlog.Debug("nvidia-smi returned N/A for unknown device", "device", name)
gpus = append(gpus, GPUMemoryInfo{
Index: idx,
Name: name,
Vendor: VendorNVIDIA,
TotalVRAM: 0,
UsedVRAM: 0,
FreeVRAM: 0,
UsagePercent: 0,
})
continue
// Check if this is a Tegra/Jetson device — if so, it uses unified memory
if isTegraDevice() {
xlog.Debug("nvidia-smi returned N/A on Tegra device, using system RAM", "device", name)
sysInfo, err := GetSystemRAMInfo()
if err != nil {
xlog.Debug("failed to get system RAM for Tegra device", "error", err, "device", name)
gpus = append(gpus, GPUMemoryInfo{
Index: idx,
Name: name,
Vendor: VendorNVIDIA,
TotalVRAM: 0,
UsedVRAM: 0,
FreeVRAM: 0,
UsagePercent: 0,
})
continue
}
totalBytes = sysInfo.Total
usedBytes = sysInfo.Used
freeBytes = sysInfo.Free
if totalBytes > 0 {
usagePercent = float64(usedBytes) / float64(totalBytes) * 100
}
} else {
// Truly unknown device with N/A values - skip memory info
xlog.Debug("nvidia-smi returned N/A for unknown device", "device", name)
gpus = append(gpus, GPUMemoryInfo{
Index: idx,
Name: name,
Vendor: VendorNVIDIA,
TotalVRAM: 0,
UsedVRAM: 0,
FreeVRAM: 0,
UsagePercent: 0,
})
continue
}
} else {
// Normal GPU with dedicated VRAM
totalMB, _ := strconv.ParseFloat(totalStr, 64)
@@ -790,3 +830,84 @@ func getVulkanGPUMemory() []GPUMemoryInfo {
return gpus
}
// getAppleGPUMemory detects Apple Silicon GPUs using system_profiler (macOS only).
// Apple Silicon uses unified memory, so GPU memory is reported as system RAM.
func getAppleGPUMemory() []GPUMemoryInfo {
if _, err := exec.LookPath("system_profiler"); err != nil {
return nil
}
cmd := exec.Command("system_profiler", "SPDisplaysDataType", "-json")
var stdout, stderr bytes.Buffer
cmd.Stdout = &stdout
cmd.Stderr = &stderr
if err := cmd.Run(); err != nil {
xlog.Debug("system_profiler failed", "error", err, "stderr", stderr.String())
return nil
}
var result struct {
SPDisplaysDataType []struct {
Name string `json:"_name"`
Model string `json:"sppci_model"`
Cores string `json:"sppci_cores"`
DeviceType string `json:"sppci_device_type"`
Vendor string `json:"spdisplays_vendor"`
} `json:"SPDisplaysDataType"`
}
if err := json.Unmarshal(stdout.Bytes(), &result); err != nil {
xlog.Debug("failed to parse system_profiler output", "error", err)
return nil
}
var gpus []GPUMemoryInfo
for i, display := range result.SPDisplaysDataType {
if display.DeviceType != "spdisplays_gpu" {
continue
}
if !strings.Contains(strings.ToLower(display.Vendor), "apple") {
continue
}
name := display.Model
if name == "" {
name = display.Name
}
if name == "" {
name = "Apple GPU"
}
// Apple Silicon uses unified memory — report system RAM
ramInfo, err := GetSystemRAMInfo()
if err != nil {
xlog.Debug("Apple GPU detected but failed to get system RAM", "error", err)
gpus = append(gpus, GPUMemoryInfo{
Index: i,
Name: name,
Vendor: VendorApple,
})
continue
}
usagePercent := 0.0
if ramInfo.Total > 0 {
usagePercent = float64(ramInfo.Used) / float64(ramInfo.Total) * 100
}
xlog.Debug("Apple Silicon GPU detected (unified memory)", "device", name, "total_ram", ramInfo.Total)
gpus = append(gpus, GPUMemoryInfo{
Index: i,
Name: name,
Vendor: VendorApple,
TotalVRAM: ramInfo.Total,
UsedVRAM: ramInfo.Used,
FreeVRAM: ramInfo.Free,
UsagePercent: usagePercent,
})
}
return gpus
}

View File

@@ -383,5 +383,144 @@ var _ = Describe("Anthropic API E2E test", func() {
Expect(string(message.StopReason)).To(Equal("tool_use"))
})
})
Context("ChatDeltas (C++ autoparser)", func() {
It("streams tool calls via ChatDeltas", func() {
stream := client.Messages.NewStreaming(context.TODO(), anthropic.MessageNewParams{
Model: "mock-model-autoparser",
MaxTokens: 1024,
Messages: []anthropic.MessageParam{
anthropic.NewUserMessage(anthropic.NewTextBlock("AUTOPARSER_TOOL_CALL What's the weather like in San Francisco?")),
},
Tools: []anthropic.ToolUnionParam{
anthropic.ToolUnionParam{
OfTool: &anthropic.ToolParam{
Name: "get_weather",
Description: anthropic.Opt("Get the current weather in a given location"),
InputSchema: anthropic.ToolInputSchemaParam{
Type: constant.ValueOf[constant.Object](),
Properties: map[string]any{
"location": map[string]any{
"type": "string",
"description": "The city and state",
},
},
Required: []string{"location"},
},
},
},
},
})
message := anthropic.Message{}
hasToolUseStart := false
for stream.Next() {
event := stream.Current()
err := message.Accumulate(event)
Expect(err).ToNot(HaveOccurred())
if e, ok := event.AsAny().(anthropic.ContentBlockStartEvent); ok {
if e.ContentBlock.Type == "tool_use" {
hasToolUseStart = true
}
}
}
Expect(stream.Err()).ToNot(HaveOccurred())
Expect(hasToolUseStart).To(BeTrue(), "Should have tool_use content_block_start event from ChatDeltas")
Expect(string(message.StopReason)).To(Equal("tool_use"))
// Verify tool call is present in accumulated message
foundToolUse := false
for _, block := range message.Content {
if block.Type == "tool_use" {
foundToolUse = true
Expect(block.ID).ToNot(BeEmpty())
}
}
Expect(foundToolUse).To(BeTrue(), "Accumulated message should contain tool_use block from ChatDeltas")
})
It("streams content via ChatDeltas without duplication", func() {
stream := client.Messages.NewStreaming(context.TODO(), anthropic.MessageNewParams{
Model: "mock-model-autoparser",
MaxTokens: 1024,
Messages: []anthropic.MessageParam{
anthropic.NewUserMessage(anthropic.NewTextBlock("AUTOPARSER_CONTENT Tell me about LocalAI")),
},
})
message := anthropic.Message{}
var textDeltas []string
for stream.Next() {
event := stream.Current()
err := message.Accumulate(event)
Expect(err).ToNot(HaveOccurred())
if e, ok := event.AsAny().(anthropic.ContentBlockDeltaEvent); ok {
if e.Delta.Type == "text_delta" && e.Delta.Text != "" {
textDeltas = append(textDeltas, e.Delta.Text)
}
}
}
Expect(stream.Err()).ToNot(HaveOccurred())
Expect(message.Content).ToNot(BeEmpty())
Expect(string(message.StopReason)).To(Equal("end_turn"))
// Content should appear exactly once (no duplication)
fullText := ""
for _, block := range message.Content {
if block.Type == "text" {
fullText += block.Text
}
}
Expect(fullText).To(ContainSubstring("LocalAI"))
// Check that the content is not duplicated by counting occurrences
Expect(len(fullText)).To(BeNumerically("<", 200), "Content should not be duplicated")
})
It("handles tool calls via ChatDeltas in non-streaming mode", func() {
message, err := client.Messages.New(context.TODO(), anthropic.MessageNewParams{
Model: "mock-model-autoparser",
MaxTokens: 1024,
Messages: []anthropic.MessageParam{
anthropic.NewUserMessage(anthropic.NewTextBlock("AUTOPARSER_TOOL_CALL What's the weather like in San Francisco?")),
},
Tools: []anthropic.ToolUnionParam{
anthropic.ToolUnionParam{
OfTool: &anthropic.ToolParam{
Name: "get_weather",
Description: anthropic.Opt("Get the current weather"),
InputSchema: anthropic.ToolInputSchemaParam{
Type: constant.ValueOf[constant.Object](),
Properties: map[string]any{
"location": map[string]any{
"type": "string",
},
},
Required: []string{"location"},
},
},
},
},
})
Expect(err).ToNot(HaveOccurred())
Expect(message.Content).ToNot(BeEmpty())
Expect(string(message.StopReason)).To(Equal("tool_use"))
foundToolUse := false
for _, block := range message.Content {
if block.Type == "tool_use" {
foundToolUse = true
Expect(block.ID).ToNot(BeEmpty())
}
}
Expect(foundToolUse).To(BeTrue(), "Should have tool_use block from ChatDeltas")
})
})
})
})