Compare commits

..

2 Commits

Author SHA1 Message Date
Jesse Gross
0cab1b6193 server: use tiered VRAM-based default context length
Replace binary low VRAM mode with tiered VRAM thresholds that set
default context lengths for all models:

- < 24 GiB VRAM: 4,096 context
- 24-48 GiB VRAM: 32,768 context
- >= 48 GiB VRAM: 262,144 context
2026-01-28 14:19:31 -08:00
Jesse Gross
05359f383a server: fix ollama ps showing configured instead of actual context length
When context length is clamped to the model's trained context length,
ollama ps now shows the actual clamped value instead of the originally
configured value.
2026-01-27 16:32:08 -08:00
19 changed files with 211 additions and 153 deletions

View File

@@ -15,15 +15,11 @@ type Claude struct{}
func (c *Claude) String() string { return "Claude Code" }
func (c *Claude) args(model string, extraArgs []string) []string {
var args []string
func (c *Claude) args(model string) []string {
if model != "" {
args = append(args, "--model", model)
return []string{"--model", model}
}
if len(extraArgs) > 0 {
args = append(args, extraArgs...)
}
return args
return nil
}
func (c *Claude) findPath() (string, error) {
@@ -45,13 +41,13 @@ func (c *Claude) findPath() (string, error) {
return fallback, nil
}
func (c *Claude) Run(model string, extraArgs []string) error {
func (c *Claude) Run(model string) error {
claudePath, err := c.findPath()
if err != nil {
return fmt.Errorf("claude is not installed, install from https://code.claude.com/docs/en/quickstart")
}
cmd := exec.Command(claudePath, c.args(model, extraArgs)...)
cmd := exec.Command(claudePath, c.args(model)...)
cmd.Stdin = os.Stdin
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr

View File

@@ -82,23 +82,19 @@ func TestClaudeArgs(t *testing.T) {
c := &Claude{}
tests := []struct {
name string
model string
extraArgs []string
want []string
name string
model string
want []string
}{
{"with model", "llama3.2", nil, []string{"--model", "llama3.2"}},
{"empty model", "", nil, nil},
{"with model and extra args", "llama3.2", []string{"--yolo", "--hi"}, []string{"--model", "llama3.2", "--yolo", "--hi"}},
{"empty model with extra args", "", []string{"--help"}, []string{"--help"}},
{"multiple extra args", "llama3.2", []string{"--flag1", "--flag2", "value"}, []string{"--model", "llama3.2", "--flag1", "--flag2", "value"}},
{"with model", "llama3.2", []string{"--model", "llama3.2"}},
{"empty model", "", nil},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := c.args(tt.model, tt.extraArgs)
got := c.args(tt.model)
if !slices.Equal(got, tt.want) {
t.Errorf("args(%q, %v) = %v, want %v", tt.model, tt.extraArgs, got, tt.want)
t.Errorf("args(%q) = %v, want %v", tt.model, got, tt.want)
}
})
}

View File

@@ -19,7 +19,7 @@ func (c *Clawdbot) String() string { return "Clawdbot" }
const ansiGreen = "\033[32m"
func (c *Clawdbot) Run(model string, extraArgs []string) error {
func (c *Clawdbot) Run(model string) error {
if _, err := exec.LookPath("clawdbot"); err != nil {
return fmt.Errorf("clawdbot is not installed, install from https://docs.clawd.bot")
}
@@ -32,13 +32,7 @@ func (c *Clawdbot) Run(model string, extraArgs []string) error {
return fmt.Errorf("setup failed: %w", err)
}
// Build args: "gateway" first, then any extra args
args := []string{"gateway"}
if len(extraArgs) > 0 {
args = append(args, extraArgs...)
}
cmd := exec.Command("clawdbot", args...)
cmd := exec.Command("clawdbot", "gateway")
cmd.Stdin = os.Stdin
// Capture output to detect "already running" message

View File

@@ -14,23 +14,20 @@ type Codex struct{}
func (c *Codex) String() string { return "Codex" }
func (c *Codex) args(model string, extraArgs []string) []string {
func (c *Codex) args(model string) []string {
args := []string{"--oss"}
if model != "" {
args = append(args, "-m", model)
}
if len(extraArgs) > 0 {
args = append(args, extraArgs...)
}
return args
}
func (c *Codex) Run(model string, extraArgs []string) error {
func (c *Codex) Run(model string) error {
if err := checkCodexVersion(); err != nil {
return err
}
cmd := exec.Command("codex", c.args(model, extraArgs)...)
cmd := exec.Command("codex", c.args(model)...)
cmd.Stdin = os.Stdin
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr

View File

@@ -9,22 +9,19 @@ func TestCodexArgs(t *testing.T) {
c := &Codex{}
tests := []struct {
name string
model string
extraArgs []string
want []string
name string
model string
want []string
}{
{"with model", "llama3.2", nil, []string{"--oss", "-m", "llama3.2"}},
{"empty model", "", nil, []string{"--oss"}},
{"with model and extra args", "qwen3-coder", []string{"--yolo"}, []string{"--oss", "-m", "qwen3-coder", "--yolo"}},
{"empty model with extra args", "", []string{"--help"}, []string{"--oss", "--help"}},
{"with model", "llama3.2", []string{"--oss", "-m", "llama3.2"}},
{"empty model", "", []string{"--oss"}},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := c.args(tt.model, tt.extraArgs)
got := c.args(tt.model)
if !slices.Equal(got, tt.want) {
t.Errorf("args(%q, %v) = %v, want %v", tt.model, tt.extraArgs, got, tt.want)
t.Errorf("args(%q) = %v, want %v", tt.model, got, tt.want)
}
})
}

View File

@@ -39,7 +39,7 @@ type modelEntry struct {
func (d *Droid) String() string { return "Droid" }
func (d *Droid) Run(model string, extraArgs []string) error {
func (d *Droid) Run(model string) error {
if _, err := exec.LookPath("droid"); err != nil {
return fmt.Errorf("droid is not installed, install from https://docs.factory.ai/cli/getting-started/quickstart")
}
@@ -53,7 +53,7 @@ func (d *Droid) Run(model string, extraArgs []string) error {
return fmt.Errorf("setup failed: %w", err)
}
cmd := exec.Command("droid", extraArgs...)
cmd := exec.Command("droid")
cmd.Stdin = os.Stdin
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr

View File

@@ -22,7 +22,7 @@ import (
// Runner can run an integration with a model.
type Runner interface {
Run(model string, extraArgs []string) error
Run(model string) error
// String returns the human-readable name of the integration
String() string
}
@@ -222,13 +222,13 @@ func selectModels(ctx context.Context, name, current string) ([]string, error) {
return selected, nil
}
func runIntegration(name, modelName string, extraArgs []string) error {
func runIntegration(name, modelName string) error {
r, ok := integrations[name]
if !ok {
return fmt.Errorf("unknown integration: %s", name)
}
fmt.Fprintf(os.Stderr, "\nLaunching %s with %s...\n", r, modelName)
return r.Run(modelName, extraArgs)
return r.Run(modelName)
}
// LaunchCmd returns the cobra command for launching integrations.
@@ -237,7 +237,7 @@ func LaunchCmd(checkServerHeartbeat func(cmd *cobra.Command, args []string) erro
var configFlag bool
cmd := &cobra.Command{
Use: "launch [INTEGRATION] [-- [EXTRA_ARGS...]]",
Use: "launch [INTEGRATION]",
Short: "Launch an integration with Ollama",
Long: `Launch an integration configured with Ollama models.
@@ -252,17 +252,13 @@ Examples:
ollama launch
ollama launch claude
ollama launch claude --model <model>
ollama launch droid --config (does not auto-launch)
ollama launch claude -- --yolo --hi (pass extra args to integration)`,
Args: cobra.ArbitraryArgs,
ollama launch droid --config (does not auto-launch)`,
Args: cobra.MaximumNArgs(1),
PreRunE: checkServerHeartbeat,
RunE: func(cmd *cobra.Command, args []string) error {
// Extract integration name and pass through remaining args
var name string
var extraArgs []string
if len(args) > 0 {
name = args[0]
extraArgs = args[1:]
} else {
var err error
name, err = selectIntegration()
@@ -282,7 +278,7 @@ Examples:
// If launching without --model, use saved config if available
if !configFlag && modelFlag == "" {
if config, err := loadIntegration(name); err == nil && len(config.Models) > 0 {
return runIntegration(name, config.Models[0], extraArgs)
return runIntegration(name, config.Models[0])
}
}
@@ -343,13 +339,13 @@ Examples:
if configFlag {
if launch, _ := confirmPrompt(fmt.Sprintf("\nLaunch %s now?", r)); launch {
return runIntegration(name, models[0], extraArgs)
return runIntegration(name, models[0])
}
fmt.Fprintf(os.Stderr, "Run 'ollama launch %s' to start with %s\n", strings.ToLower(name), models[0])
return nil
}
return runIntegration(name, models[0], extraArgs)
return runIntegration(name, models[0])
},
}

View File

@@ -90,8 +90,8 @@ func TestLaunchCmd(t *testing.T) {
cmd := LaunchCmd(mockCheck)
t.Run("command structure", func(t *testing.T) {
if cmd.Use != "launch [INTEGRATION] [-- [EXTRA_ARGS...]]" {
t.Errorf("Use = %q, want %q", cmd.Use, "launch [INTEGRATION] [-- [EXTRA_ARGS...]]")
if cmd.Use != "launch [INTEGRATION]" {
t.Errorf("Use = %q, want %q", cmd.Use, "launch [INTEGRATION]")
}
if cmd.Short == "" {
t.Error("Short description should not be empty")
@@ -121,7 +121,7 @@ func TestLaunchCmd(t *testing.T) {
}
func TestRunIntegration_UnknownIntegration(t *testing.T) {
err := runIntegration("unknown-integration", "model", nil)
err := runIntegration("unknown-integration", "model")
if err == nil {
t.Error("expected error for unknown integration, got nil")
}
@@ -182,69 +182,7 @@ func TestAllIntegrations_HaveRequiredMethods(t *testing.T) {
// Test Run() exists (we can't call it without actually running the command)
// Just verify the method is available
var _ func(string, []string) error = r.Run
})
}
}
func TestParseExtraArgs(t *testing.T) {
tests := []struct {
name string
args []string
wantArgs []string
wantExtraArgs []string
}{
{
name: "no extra args",
args: []string{"claude"},
wantArgs: []string{"claude"},
wantExtraArgs: nil,
},
{
name: "with extra args after --",
args: []string{"claude", "--", "--yolo", "--hi"},
wantArgs: []string{"claude"},
wantExtraArgs: []string{"--yolo", "--hi"},
},
{
name: "extra args only after --",
args: []string{"codex", "--", "--help"},
wantArgs: []string{"codex"},
wantExtraArgs: []string{"--help"},
},
{
name: "-- at end with no args after",
args: []string{"claude", "--"},
wantArgs: []string{"claude", "--"},
wantExtraArgs: nil,
},
{
name: "multiple args after --",
args: []string{"claude", "--", "--flag1", "--flag2", "value", "--flag3"},
wantArgs: []string{"claude"},
wantExtraArgs: []string{"--flag1", "--flag2", "value", "--flag3"},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Simulate the parsing logic from LaunchCmd
args := tt.args
var extraArgs []string
for i, arg := range args {
if arg == "--" && i < len(args)-1 {
extraArgs = args[i+1:]
args = args[:i]
break
}
}
if !slices.Equal(args, tt.wantArgs) {
t.Errorf("args = %v, want %v", args, tt.wantArgs)
}
if !slices.Equal(extraArgs, tt.wantExtraArgs) {
t.Errorf("extraArgs = %v, want %v", extraArgs, tt.wantExtraArgs)
}
var _ func(string) error = r.Run
})
}
}

View File

@@ -18,7 +18,7 @@ type OpenCode struct{}
func (o *OpenCode) String() string { return "OpenCode" }
func (o *OpenCode) Run(model string, extraArgs []string) error {
func (o *OpenCode) Run(model string) error {
if _, err := exec.LookPath("opencode"); err != nil {
return fmt.Errorf("opencode is not installed, install from https://opencode.ai")
}
@@ -32,7 +32,7 @@ func (o *OpenCode) Run(model string, extraArgs []string) error {
return fmt.Errorf("setup failed: %w", err)
}
cmd := exec.Command("opencode", extraArgs...)
cmd := exec.Command("opencode")
cmd.Stdin = os.Stdin
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr

View File

@@ -201,7 +201,7 @@ var (
// Enable the new Ollama engine
NewEngine = Bool("OLLAMA_NEW_ENGINE")
// ContextLength sets the default context length
ContextLength = Uint("OLLAMA_CONTEXT_LENGTH", 4096)
ContextLength = Uint("OLLAMA_CONTEXT_LENGTH", 0)
// Auth enables authentication between the Ollama client and server
UseAuth = Bool("OLLAMA_AUTH")
// Enable Vulkan backend
@@ -290,7 +290,7 @@ func AsMap() map[string]EnvVar {
"OLLAMA_ORIGINS": {"OLLAMA_ORIGINS", AllowedOrigins(), "A comma separated list of allowed origins"},
"OLLAMA_SCHED_SPREAD": {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"},
"OLLAMA_MULTIUSER_CACHE": {"OLLAMA_MULTIUSER_CACHE", MultiUserCache(), "Optimize prompt caching for multi-user scenarios"},
"OLLAMA_CONTEXT_LENGTH": {"OLLAMA_CONTEXT_LENGTH", ContextLength(), "Context length to use unless otherwise specified (default: 4096)"},
"OLLAMA_CONTEXT_LENGTH": {"OLLAMA_CONTEXT_LENGTH", ContextLength(), "Context length to use unless otherwise specified (default: 4k/32k/256k based on VRAM)"},
"OLLAMA_NEW_ENGINE": {"OLLAMA_NEW_ENGINE", NewEngine(), "Enable the new Ollama engine"},
"OLLAMA_REMOTES": {"OLLAMA_REMOTES", Remotes(), "Allowed hosts for remote models (default \"ollama.com\")"},

View File

@@ -282,7 +282,7 @@ func TestVar(t *testing.T) {
func TestContextLength(t *testing.T) {
cases := map[string]uint{
"": 4096,
"": 0,
"2048": 2048,
}

View File

@@ -80,6 +80,7 @@ type LlamaServer interface {
GetPort() int
GetDeviceInfos(ctx context.Context) []ml.DeviceInfo
HasExited() bool
ContextLength() int
}
// llmServer is an instance of a runner hosting a single model
@@ -1901,6 +1902,10 @@ func (s *llmServer) VRAMByGPU(id ml.DeviceID) uint64 {
return 0
}
func (s *llmServer) ContextLength() int {
return s.options.NumCtx
}
func (s *ollamaServer) GetDeviceInfos(ctx context.Context) []ml.DeviceInfo {
devices, err := ml.GetDevicesFromRunner(ctx, s)
if err != nil {

View File

@@ -75,16 +75,12 @@ func experimentEnabled(name string) bool {
var useClient2 = experimentEnabled("client2")
// Low VRAM mode is based on the sum of total VRAM (not free) and triggers
// reduced context length on some models
var lowVRAMThreshold uint64 = 20 * format.GibiByte
var mode string = gin.DebugMode
type Server struct {
addr net.Addr
sched *Scheduler
lowVRAM bool
addr net.Addr
sched *Scheduler
defaultNumCtx int
}
func init() {
@@ -107,8 +103,12 @@ var (
errBadTemplate = errors.New("template error")
)
func modelOptions(model *Model, requestOpts map[string]any) (api.Options, error) {
func (s *Server) modelOptions(model *Model, requestOpts map[string]any) (api.Options, error) {
opts := api.DefaultOptions()
if opts.NumCtx == 0 {
opts.NumCtx = s.defaultNumCtx
}
if err := opts.FromMap(model.Options); err != nil {
return api.Options{}, err
}
@@ -140,20 +140,11 @@ func (s *Server) scheduleRunner(ctx context.Context, name string, caps []model.C
return nil, nil, nil, fmt.Errorf("%s %w", name, err)
}
opts, err := modelOptions(model, requestOpts)
opts, err := s.modelOptions(model, requestOpts)
if err != nil {
return nil, nil, nil, err
}
// This model is much more capable with a larger context, so set that
// unless it would penalize performance too much
if !s.lowVRAM && slices.Contains([]string{
"gptoss", "gpt-oss",
"qwen3vl", "qwen3vlmoe",
}, model.Config.ModelFamily) {
opts.NumCtx = max(opts.NumCtx, 8192)
}
runnerCh, errCh := s.sched.GetRunner(ctx, model, opts, keepAlive)
var runner *runnerRef
select {
@@ -1720,10 +1711,18 @@ func Serve(ln net.Listener) error {
for _, gpu := range gpus {
totalVRAM += gpu.TotalMemory - envconfig.GpuOverhead()
}
if totalVRAM < lowVRAMThreshold {
s.lowVRAM = true
slog.Info("entering low vram mode", "total vram", format.HumanBytes2(totalVRAM), "threshold", format.HumanBytes2(lowVRAMThreshold))
// Set default context based on VRAM tier
// Use slightly lower thresholds (47/23 GiB vs. 48/24 GiB) to account for small differences in the exact value
switch {
case totalVRAM >= 47*format.GibiByte:
s.defaultNumCtx = 262144
case totalVRAM >= 23*format.GibiByte:
s.defaultNumCtx = 32768
default:
s.defaultNumCtx = 4096
}
slog.Info("vram-based default context", "total_vram", format.HumanBytes2(totalVRAM), "default_num_ctx", s.defaultNumCtx)
err = srvr.Serve(ln)
// If server is closed from the signal handler, wait for the ctx to be done
@@ -1897,8 +1896,8 @@ func (s *Server) PsHandler(c *gin.Context) {
Details: modelDetails,
ExpiresAt: v.expiresAt,
}
if v.Options != nil {
mr.ContextLength = v.Options.NumCtx
if v.llama != nil {
mr.ContextLength = v.llama.ContextLength()
}
// The scheduler waits to set expiresAt, so if a model is loading it's
// possible that it will be set to the unix epoch. For those cases, just

View File

@@ -15,6 +15,7 @@ import (
)
func TestGenerateDebugRenderOnly(t *testing.T) {
t.Setenv("OLLAMA_CONTEXT_LENGTH", "4096")
gin.SetMode(gin.TestMode)
mock := mockRunner{
@@ -208,6 +209,7 @@ func TestGenerateDebugRenderOnly(t *testing.T) {
}
func TestChatDebugRenderOnly(t *testing.T) {
t.Setenv("OLLAMA_CONTEXT_LENGTH", "4096")
gin.SetMode(gin.TestMode)
mock := mockRunner{

View File

@@ -20,6 +20,7 @@ import (
// TestGenerateWithBuiltinRenderer tests that api/generate uses built-in renderers
// when in chat-like flow (messages present, no suffix, no template)
func TestGenerateWithBuiltinRenderer(t *testing.T) {
t.Setenv("OLLAMA_CONTEXT_LENGTH", "4096")
gin.SetMode(gin.TestMode)
mock := mockRunner{
@@ -204,6 +205,7 @@ func TestGenerateWithBuiltinRenderer(t *testing.T) {
// TestGenerateWithDebugRenderOnly tests that debug_render_only works with built-in renderers
func TestGenerateWithDebugRenderOnly(t *testing.T) {
t.Setenv("OLLAMA_CONTEXT_LENGTH", "4096")
gin.SetMode(gin.TestMode)
mock := mockRunner{

View File

@@ -162,6 +162,7 @@ func TestGenerateChatRemote(t *testing.T) {
}
func TestGenerateChat(t *testing.T) {
t.Setenv("OLLAMA_CONTEXT_LENGTH", "4096")
gin.SetMode(gin.TestMode)
mock := mockRunner{
@@ -878,6 +879,7 @@ func TestGenerateChat(t *testing.T) {
}
func TestGenerate(t *testing.T) {
t.Setenv("OLLAMA_CONTEXT_LENGTH", "4096")
gin.SetMode(gin.TestMode)
mock := mockRunner{
@@ -2355,6 +2357,7 @@ func TestGenerateWithImages(t *testing.T) {
// TestImageGenerateStreamFalse tests that image generation respects stream=false
// and returns a single JSON response instead of streaming ndjson.
func TestImageGenerateStreamFalse(t *testing.T) {
t.Setenv("OLLAMA_CONTEXT_LENGTH", "4096")
gin.SetMode(gin.TestMode)
p := t.TempDir()

View File

@@ -0,0 +1,127 @@
package server
import (
"testing"
)
func TestModelOptionsNumCtxPriority(t *testing.T) {
tests := []struct {
name string
envContextLen string // empty means not set (uses 0 sentinel)
defaultNumCtx int // VRAM-based default
modelNumCtx int // 0 means not set in model
requestNumCtx int // 0 means not set in request
expectedNumCtx int
}{
{
name: "vram default when nothing else set",
envContextLen: "",
defaultNumCtx: 32768,
modelNumCtx: 0,
requestNumCtx: 0,
expectedNumCtx: 32768,
},
{
name: "env var overrides vram default",
envContextLen: "8192",
defaultNumCtx: 32768,
modelNumCtx: 0,
requestNumCtx: 0,
expectedNumCtx: 8192,
},
{
name: "model overrides vram default",
envContextLen: "",
defaultNumCtx: 32768,
modelNumCtx: 16384,
requestNumCtx: 0,
expectedNumCtx: 16384,
},
{
name: "model overrides env var",
envContextLen: "8192",
defaultNumCtx: 32768,
modelNumCtx: 16384,
requestNumCtx: 0,
expectedNumCtx: 16384,
},
{
name: "request overrides everything",
envContextLen: "8192",
defaultNumCtx: 32768,
modelNumCtx: 16384,
requestNumCtx: 4096,
expectedNumCtx: 4096,
},
{
name: "request overrides vram default",
envContextLen: "",
defaultNumCtx: 32768,
modelNumCtx: 0,
requestNumCtx: 4096,
expectedNumCtx: 4096,
},
{
name: "request overrides model",
envContextLen: "",
defaultNumCtx: 32768,
modelNumCtx: 16384,
requestNumCtx: 4096,
expectedNumCtx: 4096,
},
{
name: "low vram tier default",
envContextLen: "",
defaultNumCtx: 4096,
modelNumCtx: 0,
requestNumCtx: 0,
expectedNumCtx: 4096,
},
{
name: "high vram tier default",
envContextLen: "",
defaultNumCtx: 262144,
modelNumCtx: 0,
requestNumCtx: 0,
expectedNumCtx: 262144,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Set or clear environment variable
if tt.envContextLen != "" {
t.Setenv("OLLAMA_CONTEXT_LENGTH", tt.envContextLen)
}
// Create server with VRAM-based default
s := &Server{
defaultNumCtx: tt.defaultNumCtx,
}
// Create model options (use float64 as FromMap expects JSON-style numbers)
var modelOpts map[string]any
if tt.modelNumCtx != 0 {
modelOpts = map[string]any{"num_ctx": float64(tt.modelNumCtx)}
}
model := &Model{
Options: modelOpts,
}
// Create request options (use float64 as FromMap expects JSON-style numbers)
var requestOpts map[string]any
if tt.requestNumCtx != 0 {
requestOpts = map[string]any{"num_ctx": float64(tt.requestNumCtx)}
}
opts, err := s.modelOptions(model, requestOpts)
if err != nil {
t.Fatalf("modelOptions failed: %v", err)
}
if opts.NumCtx != tt.expectedNumCtx {
t.Errorf("NumCtx = %d, want %d", opts.NumCtx, tt.expectedNumCtx)
}
})
}
}

View File

@@ -804,6 +804,7 @@ func (s *mockLlm) GetPort() int { return -
func (s *mockLlm) GetDeviceInfos(ctx context.Context) []ml.DeviceInfo { return nil }
func (s *mockLlm) HasExited() bool { return false }
func (s *mockLlm) GetActiveDeviceIDs() []ml.DeviceID { return nil }
func (s *mockLlm) ContextLength() int { return 0 }
// TestImageGenRunnerCanBeEvicted verifies that an image generation model
// loaded in the scheduler can be evicted when idle.

View File

@@ -347,6 +347,11 @@ func (s *Server) VRAMByGPU(id ml.DeviceID) uint64 {
return s.vramSize
}
// Context length is not applicable for image generation.
func (s *Server) ContextLength() int {
return 0
}
func (s *Server) Embedding(ctx context.Context, input string) ([]float32, int, error) {
return nil, 0, errors.New("not supported")
}