mirror of
https://github.com/mudler/LocalAI.git
synced 2026-05-18 05:33:09 -04:00
* feat: Rework VRAM estimation and use known_usecases in gallery Signed-off-by: Richard Palethorpe <io@richiejp.com> Assisted-by: Claude:claude-opus-4-7[1m] [Claude Code] * chore(gallery): regenerate gallery index and add known_usecases to model entries Signed-off-by: Richard Palethorpe <io@richiejp.com> --------- Signed-off-by: Richard Palethorpe <io@richiejp.com>
39 lines
1.5 KiB
Go
39 lines
1.5 KiB
Go
package localai
|
|
|
|
import (
|
|
"net/http"
|
|
|
|
"github.com/labstack/echo/v4"
|
|
"github.com/mudler/LocalAI/core/config"
|
|
"github.com/mudler/LocalAI/core/services/modeladmin"
|
|
)
|
|
|
|
// VRAMEstimateEndpoint returns a handler that estimates VRAM usage for an
|
|
// installed model configuration at multiple context sizes.
|
|
// @Summary Estimate VRAM usage for a model
|
|
// @Description Estimates VRAM based on model weight files at multiple context sizes
|
|
// @Tags config
|
|
// @Accept json
|
|
// @Produce json
|
|
// @Param request body modeladmin.VRAMRequest true "VRAM estimation parameters"
|
|
// @Success 200 {object} modeladmin.VRAMResponse "VRAM estimate"
|
|
// @Router /api/models/vram-estimate [post]
|
|
func VRAMEstimateEndpoint(cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
|
|
return func(c echo.Context) error {
|
|
var req modeladmin.VRAMRequest
|
|
if err := c.Bind(&req); err != nil {
|
|
return c.JSON(http.StatusBadRequest, map[string]any{"error": "invalid request body"})
|
|
}
|
|
resp, err := modeladmin.EstimateVRAM(c.Request().Context(), req, cl, appConfig.SystemState)
|
|
if err != nil {
|
|
return c.JSON(httpStatusForModelAdminError(err), map[string]any{"error": err.Error()})
|
|
}
|
|
// Backwards compat: when there are no weight files, the previous
|
|
// handler returned {"message": "..."} rather than a typed response.
|
|
if resp.ContextNote == "no weight files found for estimation" && resp.EstimateResult.SizeBytes == 0 {
|
|
return c.JSON(http.StatusOK, map[string]any{"message": resp.ContextNote})
|
|
}
|
|
return c.JSON(http.StatusOK, resp)
|
|
}
|
|
}
|