mirror of
https://github.com/ollama/ollama.git
synced 2026-01-29 09:43:35 -05:00
Compare commits
2 Commits
parth-laun
...
jessegross
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0cab1b6193 | ||
|
|
05359f383a |
@@ -15,15 +15,11 @@ type Claude struct{}
|
||||
|
||||
func (c *Claude) String() string { return "Claude Code" }
|
||||
|
||||
func (c *Claude) args(model string, extraArgs []string) []string {
|
||||
var args []string
|
||||
func (c *Claude) args(model string) []string {
|
||||
if model != "" {
|
||||
args = append(args, "--model", model)
|
||||
return []string{"--model", model}
|
||||
}
|
||||
if len(extraArgs) > 0 {
|
||||
args = append(args, extraArgs...)
|
||||
}
|
||||
return args
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Claude) findPath() (string, error) {
|
||||
@@ -45,13 +41,13 @@ func (c *Claude) findPath() (string, error) {
|
||||
return fallback, nil
|
||||
}
|
||||
|
||||
func (c *Claude) Run(model string, extraArgs []string) error {
|
||||
func (c *Claude) Run(model string) error {
|
||||
claudePath, err := c.findPath()
|
||||
if err != nil {
|
||||
return fmt.Errorf("claude is not installed, install from https://code.claude.com/docs/en/quickstart")
|
||||
}
|
||||
|
||||
cmd := exec.Command(claudePath, c.args(model, extraArgs)...)
|
||||
cmd := exec.Command(claudePath, c.args(model)...)
|
||||
cmd.Stdin = os.Stdin
|
||||
cmd.Stdout = os.Stdout
|
||||
cmd.Stderr = os.Stderr
|
||||
|
||||
@@ -82,23 +82,19 @@ func TestClaudeArgs(t *testing.T) {
|
||||
c := &Claude{}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
model string
|
||||
extraArgs []string
|
||||
want []string
|
||||
name string
|
||||
model string
|
||||
want []string
|
||||
}{
|
||||
{"with model", "llama3.2", nil, []string{"--model", "llama3.2"}},
|
||||
{"empty model", "", nil, nil},
|
||||
{"with model and extra args", "llama3.2", []string{"--yolo", "--hi"}, []string{"--model", "llama3.2", "--yolo", "--hi"}},
|
||||
{"empty model with extra args", "", []string{"--help"}, []string{"--help"}},
|
||||
{"multiple extra args", "llama3.2", []string{"--flag1", "--flag2", "value"}, []string{"--model", "llama3.2", "--flag1", "--flag2", "value"}},
|
||||
{"with model", "llama3.2", []string{"--model", "llama3.2"}},
|
||||
{"empty model", "", nil},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got := c.args(tt.model, tt.extraArgs)
|
||||
got := c.args(tt.model)
|
||||
if !slices.Equal(got, tt.want) {
|
||||
t.Errorf("args(%q, %v) = %v, want %v", tt.model, tt.extraArgs, got, tt.want)
|
||||
t.Errorf("args(%q) = %v, want %v", tt.model, got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
@@ -19,7 +19,7 @@ func (c *Clawdbot) String() string { return "Clawdbot" }
|
||||
|
||||
const ansiGreen = "\033[32m"
|
||||
|
||||
func (c *Clawdbot) Run(model string, extraArgs []string) error {
|
||||
func (c *Clawdbot) Run(model string) error {
|
||||
if _, err := exec.LookPath("clawdbot"); err != nil {
|
||||
return fmt.Errorf("clawdbot is not installed, install from https://docs.clawd.bot")
|
||||
}
|
||||
@@ -32,13 +32,7 @@ func (c *Clawdbot) Run(model string, extraArgs []string) error {
|
||||
return fmt.Errorf("setup failed: %w", err)
|
||||
}
|
||||
|
||||
// Build args: "gateway" first, then any extra args
|
||||
args := []string{"gateway"}
|
||||
if len(extraArgs) > 0 {
|
||||
args = append(args, extraArgs...)
|
||||
}
|
||||
|
||||
cmd := exec.Command("clawdbot", args...)
|
||||
cmd := exec.Command("clawdbot", "gateway")
|
||||
cmd.Stdin = os.Stdin
|
||||
|
||||
// Capture output to detect "already running" message
|
||||
|
||||
@@ -14,23 +14,20 @@ type Codex struct{}
|
||||
|
||||
func (c *Codex) String() string { return "Codex" }
|
||||
|
||||
func (c *Codex) args(model string, extraArgs []string) []string {
|
||||
func (c *Codex) args(model string) []string {
|
||||
args := []string{"--oss"}
|
||||
if model != "" {
|
||||
args = append(args, "-m", model)
|
||||
}
|
||||
if len(extraArgs) > 0 {
|
||||
args = append(args, extraArgs...)
|
||||
}
|
||||
return args
|
||||
}
|
||||
|
||||
func (c *Codex) Run(model string, extraArgs []string) error {
|
||||
func (c *Codex) Run(model string) error {
|
||||
if err := checkCodexVersion(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
cmd := exec.Command("codex", c.args(model, extraArgs)...)
|
||||
cmd := exec.Command("codex", c.args(model)...)
|
||||
cmd.Stdin = os.Stdin
|
||||
cmd.Stdout = os.Stdout
|
||||
cmd.Stderr = os.Stderr
|
||||
|
||||
@@ -9,22 +9,19 @@ func TestCodexArgs(t *testing.T) {
|
||||
c := &Codex{}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
model string
|
||||
extraArgs []string
|
||||
want []string
|
||||
name string
|
||||
model string
|
||||
want []string
|
||||
}{
|
||||
{"with model", "llama3.2", nil, []string{"--oss", "-m", "llama3.2"}},
|
||||
{"empty model", "", nil, []string{"--oss"}},
|
||||
{"with model and extra args", "qwen3-coder", []string{"--yolo"}, []string{"--oss", "-m", "qwen3-coder", "--yolo"}},
|
||||
{"empty model with extra args", "", []string{"--help"}, []string{"--oss", "--help"}},
|
||||
{"with model", "llama3.2", []string{"--oss", "-m", "llama3.2"}},
|
||||
{"empty model", "", []string{"--oss"}},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got := c.args(tt.model, tt.extraArgs)
|
||||
got := c.args(tt.model)
|
||||
if !slices.Equal(got, tt.want) {
|
||||
t.Errorf("args(%q, %v) = %v, want %v", tt.model, tt.extraArgs, got, tt.want)
|
||||
t.Errorf("args(%q) = %v, want %v", tt.model, got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
@@ -39,7 +39,7 @@ type modelEntry struct {
|
||||
|
||||
func (d *Droid) String() string { return "Droid" }
|
||||
|
||||
func (d *Droid) Run(model string, extraArgs []string) error {
|
||||
func (d *Droid) Run(model string) error {
|
||||
if _, err := exec.LookPath("droid"); err != nil {
|
||||
return fmt.Errorf("droid is not installed, install from https://docs.factory.ai/cli/getting-started/quickstart")
|
||||
}
|
||||
@@ -53,7 +53,7 @@ func (d *Droid) Run(model string, extraArgs []string) error {
|
||||
return fmt.Errorf("setup failed: %w", err)
|
||||
}
|
||||
|
||||
cmd := exec.Command("droid", extraArgs...)
|
||||
cmd := exec.Command("droid")
|
||||
cmd.Stdin = os.Stdin
|
||||
cmd.Stdout = os.Stdout
|
||||
cmd.Stderr = os.Stderr
|
||||
|
||||
@@ -22,7 +22,7 @@ import (
|
||||
// Runner can run an integration with a model.
|
||||
|
||||
type Runner interface {
|
||||
Run(model string, extraArgs []string) error
|
||||
Run(model string) error
|
||||
// String returns the human-readable name of the integration
|
||||
String() string
|
||||
}
|
||||
@@ -222,13 +222,13 @@ func selectModels(ctx context.Context, name, current string) ([]string, error) {
|
||||
return selected, nil
|
||||
}
|
||||
|
||||
func runIntegration(name, modelName string, extraArgs []string) error {
|
||||
func runIntegration(name, modelName string) error {
|
||||
r, ok := integrations[name]
|
||||
if !ok {
|
||||
return fmt.Errorf("unknown integration: %s", name)
|
||||
}
|
||||
fmt.Fprintf(os.Stderr, "\nLaunching %s with %s...\n", r, modelName)
|
||||
return r.Run(modelName, extraArgs)
|
||||
return r.Run(modelName)
|
||||
}
|
||||
|
||||
// LaunchCmd returns the cobra command for launching integrations.
|
||||
@@ -237,7 +237,7 @@ func LaunchCmd(checkServerHeartbeat func(cmd *cobra.Command, args []string) erro
|
||||
var configFlag bool
|
||||
|
||||
cmd := &cobra.Command{
|
||||
Use: "launch [INTEGRATION] [-- [EXTRA_ARGS...]]",
|
||||
Use: "launch [INTEGRATION]",
|
||||
Short: "Launch an integration with Ollama",
|
||||
Long: `Launch an integration configured with Ollama models.
|
||||
|
||||
@@ -252,17 +252,13 @@ Examples:
|
||||
ollama launch
|
||||
ollama launch claude
|
||||
ollama launch claude --model <model>
|
||||
ollama launch droid --config (does not auto-launch)
|
||||
ollama launch claude -- --yolo --hi (pass extra args to integration)`,
|
||||
Args: cobra.ArbitraryArgs,
|
||||
ollama launch droid --config (does not auto-launch)`,
|
||||
Args: cobra.MaximumNArgs(1),
|
||||
PreRunE: checkServerHeartbeat,
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
// Extract integration name and pass through remaining args
|
||||
var name string
|
||||
var extraArgs []string
|
||||
if len(args) > 0 {
|
||||
name = args[0]
|
||||
extraArgs = args[1:]
|
||||
} else {
|
||||
var err error
|
||||
name, err = selectIntegration()
|
||||
@@ -282,7 +278,7 @@ Examples:
|
||||
// If launching without --model, use saved config if available
|
||||
if !configFlag && modelFlag == "" {
|
||||
if config, err := loadIntegration(name); err == nil && len(config.Models) > 0 {
|
||||
return runIntegration(name, config.Models[0], extraArgs)
|
||||
return runIntegration(name, config.Models[0])
|
||||
}
|
||||
}
|
||||
|
||||
@@ -343,13 +339,13 @@ Examples:
|
||||
|
||||
if configFlag {
|
||||
if launch, _ := confirmPrompt(fmt.Sprintf("\nLaunch %s now?", r)); launch {
|
||||
return runIntegration(name, models[0], extraArgs)
|
||||
return runIntegration(name, models[0])
|
||||
}
|
||||
fmt.Fprintf(os.Stderr, "Run 'ollama launch %s' to start with %s\n", strings.ToLower(name), models[0])
|
||||
return nil
|
||||
}
|
||||
|
||||
return runIntegration(name, models[0], extraArgs)
|
||||
return runIntegration(name, models[0])
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@@ -90,8 +90,8 @@ func TestLaunchCmd(t *testing.T) {
|
||||
cmd := LaunchCmd(mockCheck)
|
||||
|
||||
t.Run("command structure", func(t *testing.T) {
|
||||
if cmd.Use != "launch [INTEGRATION] [-- [EXTRA_ARGS...]]" {
|
||||
t.Errorf("Use = %q, want %q", cmd.Use, "launch [INTEGRATION] [-- [EXTRA_ARGS...]]")
|
||||
if cmd.Use != "launch [INTEGRATION]" {
|
||||
t.Errorf("Use = %q, want %q", cmd.Use, "launch [INTEGRATION]")
|
||||
}
|
||||
if cmd.Short == "" {
|
||||
t.Error("Short description should not be empty")
|
||||
@@ -121,7 +121,7 @@ func TestLaunchCmd(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestRunIntegration_UnknownIntegration(t *testing.T) {
|
||||
err := runIntegration("unknown-integration", "model", nil)
|
||||
err := runIntegration("unknown-integration", "model")
|
||||
if err == nil {
|
||||
t.Error("expected error for unknown integration, got nil")
|
||||
}
|
||||
@@ -182,69 +182,7 @@ func TestAllIntegrations_HaveRequiredMethods(t *testing.T) {
|
||||
|
||||
// Test Run() exists (we can't call it without actually running the command)
|
||||
// Just verify the method is available
|
||||
var _ func(string, []string) error = r.Run
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseExtraArgs(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
args []string
|
||||
wantArgs []string
|
||||
wantExtraArgs []string
|
||||
}{
|
||||
{
|
||||
name: "no extra args",
|
||||
args: []string{"claude"},
|
||||
wantArgs: []string{"claude"},
|
||||
wantExtraArgs: nil,
|
||||
},
|
||||
{
|
||||
name: "with extra args after --",
|
||||
args: []string{"claude", "--", "--yolo", "--hi"},
|
||||
wantArgs: []string{"claude"},
|
||||
wantExtraArgs: []string{"--yolo", "--hi"},
|
||||
},
|
||||
{
|
||||
name: "extra args only after --",
|
||||
args: []string{"codex", "--", "--help"},
|
||||
wantArgs: []string{"codex"},
|
||||
wantExtraArgs: []string{"--help"},
|
||||
},
|
||||
{
|
||||
name: "-- at end with no args after",
|
||||
args: []string{"claude", "--"},
|
||||
wantArgs: []string{"claude", "--"},
|
||||
wantExtraArgs: nil,
|
||||
},
|
||||
{
|
||||
name: "multiple args after --",
|
||||
args: []string{"claude", "--", "--flag1", "--flag2", "value", "--flag3"},
|
||||
wantArgs: []string{"claude"},
|
||||
wantExtraArgs: []string{"--flag1", "--flag2", "value", "--flag3"},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
// Simulate the parsing logic from LaunchCmd
|
||||
args := tt.args
|
||||
var extraArgs []string
|
||||
for i, arg := range args {
|
||||
if arg == "--" && i < len(args)-1 {
|
||||
extraArgs = args[i+1:]
|
||||
args = args[:i]
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if !slices.Equal(args, tt.wantArgs) {
|
||||
t.Errorf("args = %v, want %v", args, tt.wantArgs)
|
||||
}
|
||||
if !slices.Equal(extraArgs, tt.wantExtraArgs) {
|
||||
t.Errorf("extraArgs = %v, want %v", extraArgs, tt.wantExtraArgs)
|
||||
}
|
||||
var _ func(string) error = r.Run
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,7 +18,7 @@ type OpenCode struct{}
|
||||
|
||||
func (o *OpenCode) String() string { return "OpenCode" }
|
||||
|
||||
func (o *OpenCode) Run(model string, extraArgs []string) error {
|
||||
func (o *OpenCode) Run(model string) error {
|
||||
if _, err := exec.LookPath("opencode"); err != nil {
|
||||
return fmt.Errorf("opencode is not installed, install from https://opencode.ai")
|
||||
}
|
||||
@@ -32,7 +32,7 @@ func (o *OpenCode) Run(model string, extraArgs []string) error {
|
||||
return fmt.Errorf("setup failed: %w", err)
|
||||
}
|
||||
|
||||
cmd := exec.Command("opencode", extraArgs...)
|
||||
cmd := exec.Command("opencode")
|
||||
cmd.Stdin = os.Stdin
|
||||
cmd.Stdout = os.Stdout
|
||||
cmd.Stderr = os.Stderr
|
||||
|
||||
@@ -201,7 +201,7 @@ var (
|
||||
// Enable the new Ollama engine
|
||||
NewEngine = Bool("OLLAMA_NEW_ENGINE")
|
||||
// ContextLength sets the default context length
|
||||
ContextLength = Uint("OLLAMA_CONTEXT_LENGTH", 4096)
|
||||
ContextLength = Uint("OLLAMA_CONTEXT_LENGTH", 0)
|
||||
// Auth enables authentication between the Ollama client and server
|
||||
UseAuth = Bool("OLLAMA_AUTH")
|
||||
// Enable Vulkan backend
|
||||
@@ -290,7 +290,7 @@ func AsMap() map[string]EnvVar {
|
||||
"OLLAMA_ORIGINS": {"OLLAMA_ORIGINS", AllowedOrigins(), "A comma separated list of allowed origins"},
|
||||
"OLLAMA_SCHED_SPREAD": {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"},
|
||||
"OLLAMA_MULTIUSER_CACHE": {"OLLAMA_MULTIUSER_CACHE", MultiUserCache(), "Optimize prompt caching for multi-user scenarios"},
|
||||
"OLLAMA_CONTEXT_LENGTH": {"OLLAMA_CONTEXT_LENGTH", ContextLength(), "Context length to use unless otherwise specified (default: 4096)"},
|
||||
"OLLAMA_CONTEXT_LENGTH": {"OLLAMA_CONTEXT_LENGTH", ContextLength(), "Context length to use unless otherwise specified (default: 4k/32k/256k based on VRAM)"},
|
||||
"OLLAMA_NEW_ENGINE": {"OLLAMA_NEW_ENGINE", NewEngine(), "Enable the new Ollama engine"},
|
||||
"OLLAMA_REMOTES": {"OLLAMA_REMOTES", Remotes(), "Allowed hosts for remote models (default \"ollama.com\")"},
|
||||
|
||||
|
||||
@@ -282,7 +282,7 @@ func TestVar(t *testing.T) {
|
||||
|
||||
func TestContextLength(t *testing.T) {
|
||||
cases := map[string]uint{
|
||||
"": 4096,
|
||||
"": 0,
|
||||
"2048": 2048,
|
||||
}
|
||||
|
||||
|
||||
@@ -80,6 +80,7 @@ type LlamaServer interface {
|
||||
GetPort() int
|
||||
GetDeviceInfos(ctx context.Context) []ml.DeviceInfo
|
||||
HasExited() bool
|
||||
ContextLength() int
|
||||
}
|
||||
|
||||
// llmServer is an instance of a runner hosting a single model
|
||||
@@ -1901,6 +1902,10 @@ func (s *llmServer) VRAMByGPU(id ml.DeviceID) uint64 {
|
||||
return 0
|
||||
}
|
||||
|
||||
func (s *llmServer) ContextLength() int {
|
||||
return s.options.NumCtx
|
||||
}
|
||||
|
||||
func (s *ollamaServer) GetDeviceInfos(ctx context.Context) []ml.DeviceInfo {
|
||||
devices, err := ml.GetDevicesFromRunner(ctx, s)
|
||||
if err != nil {
|
||||
|
||||
@@ -75,16 +75,12 @@ func experimentEnabled(name string) bool {
|
||||
|
||||
var useClient2 = experimentEnabled("client2")
|
||||
|
||||
// Low VRAM mode is based on the sum of total VRAM (not free) and triggers
|
||||
// reduced context length on some models
|
||||
var lowVRAMThreshold uint64 = 20 * format.GibiByte
|
||||
|
||||
var mode string = gin.DebugMode
|
||||
|
||||
type Server struct {
|
||||
addr net.Addr
|
||||
sched *Scheduler
|
||||
lowVRAM bool
|
||||
addr net.Addr
|
||||
sched *Scheduler
|
||||
defaultNumCtx int
|
||||
}
|
||||
|
||||
func init() {
|
||||
@@ -107,8 +103,12 @@ var (
|
||||
errBadTemplate = errors.New("template error")
|
||||
)
|
||||
|
||||
func modelOptions(model *Model, requestOpts map[string]any) (api.Options, error) {
|
||||
func (s *Server) modelOptions(model *Model, requestOpts map[string]any) (api.Options, error) {
|
||||
opts := api.DefaultOptions()
|
||||
if opts.NumCtx == 0 {
|
||||
opts.NumCtx = s.defaultNumCtx
|
||||
}
|
||||
|
||||
if err := opts.FromMap(model.Options); err != nil {
|
||||
return api.Options{}, err
|
||||
}
|
||||
@@ -140,20 +140,11 @@ func (s *Server) scheduleRunner(ctx context.Context, name string, caps []model.C
|
||||
return nil, nil, nil, fmt.Errorf("%s %w", name, err)
|
||||
}
|
||||
|
||||
opts, err := modelOptions(model, requestOpts)
|
||||
opts, err := s.modelOptions(model, requestOpts)
|
||||
if err != nil {
|
||||
return nil, nil, nil, err
|
||||
}
|
||||
|
||||
// This model is much more capable with a larger context, so set that
|
||||
// unless it would penalize performance too much
|
||||
if !s.lowVRAM && slices.Contains([]string{
|
||||
"gptoss", "gpt-oss",
|
||||
"qwen3vl", "qwen3vlmoe",
|
||||
}, model.Config.ModelFamily) {
|
||||
opts.NumCtx = max(opts.NumCtx, 8192)
|
||||
}
|
||||
|
||||
runnerCh, errCh := s.sched.GetRunner(ctx, model, opts, keepAlive)
|
||||
var runner *runnerRef
|
||||
select {
|
||||
@@ -1720,10 +1711,18 @@ func Serve(ln net.Listener) error {
|
||||
for _, gpu := range gpus {
|
||||
totalVRAM += gpu.TotalMemory - envconfig.GpuOverhead()
|
||||
}
|
||||
if totalVRAM < lowVRAMThreshold {
|
||||
s.lowVRAM = true
|
||||
slog.Info("entering low vram mode", "total vram", format.HumanBytes2(totalVRAM), "threshold", format.HumanBytes2(lowVRAMThreshold))
|
||||
|
||||
// Set default context based on VRAM tier
|
||||
// Use slightly lower thresholds (47/23 GiB vs. 48/24 GiB) to account for small differences in the exact value
|
||||
switch {
|
||||
case totalVRAM >= 47*format.GibiByte:
|
||||
s.defaultNumCtx = 262144
|
||||
case totalVRAM >= 23*format.GibiByte:
|
||||
s.defaultNumCtx = 32768
|
||||
default:
|
||||
s.defaultNumCtx = 4096
|
||||
}
|
||||
slog.Info("vram-based default context", "total_vram", format.HumanBytes2(totalVRAM), "default_num_ctx", s.defaultNumCtx)
|
||||
|
||||
err = srvr.Serve(ln)
|
||||
// If server is closed from the signal handler, wait for the ctx to be done
|
||||
@@ -1897,8 +1896,8 @@ func (s *Server) PsHandler(c *gin.Context) {
|
||||
Details: modelDetails,
|
||||
ExpiresAt: v.expiresAt,
|
||||
}
|
||||
if v.Options != nil {
|
||||
mr.ContextLength = v.Options.NumCtx
|
||||
if v.llama != nil {
|
||||
mr.ContextLength = v.llama.ContextLength()
|
||||
}
|
||||
// The scheduler waits to set expiresAt, so if a model is loading it's
|
||||
// possible that it will be set to the unix epoch. For those cases, just
|
||||
|
||||
@@ -15,6 +15,7 @@ import (
|
||||
)
|
||||
|
||||
func TestGenerateDebugRenderOnly(t *testing.T) {
|
||||
t.Setenv("OLLAMA_CONTEXT_LENGTH", "4096")
|
||||
gin.SetMode(gin.TestMode)
|
||||
|
||||
mock := mockRunner{
|
||||
@@ -208,6 +209,7 @@ func TestGenerateDebugRenderOnly(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestChatDebugRenderOnly(t *testing.T) {
|
||||
t.Setenv("OLLAMA_CONTEXT_LENGTH", "4096")
|
||||
gin.SetMode(gin.TestMode)
|
||||
|
||||
mock := mockRunner{
|
||||
|
||||
@@ -20,6 +20,7 @@ import (
|
||||
// TestGenerateWithBuiltinRenderer tests that api/generate uses built-in renderers
|
||||
// when in chat-like flow (messages present, no suffix, no template)
|
||||
func TestGenerateWithBuiltinRenderer(t *testing.T) {
|
||||
t.Setenv("OLLAMA_CONTEXT_LENGTH", "4096")
|
||||
gin.SetMode(gin.TestMode)
|
||||
|
||||
mock := mockRunner{
|
||||
@@ -204,6 +205,7 @@ func TestGenerateWithBuiltinRenderer(t *testing.T) {
|
||||
|
||||
// TestGenerateWithDebugRenderOnly tests that debug_render_only works with built-in renderers
|
||||
func TestGenerateWithDebugRenderOnly(t *testing.T) {
|
||||
t.Setenv("OLLAMA_CONTEXT_LENGTH", "4096")
|
||||
gin.SetMode(gin.TestMode)
|
||||
|
||||
mock := mockRunner{
|
||||
|
||||
@@ -162,6 +162,7 @@ func TestGenerateChatRemote(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestGenerateChat(t *testing.T) {
|
||||
t.Setenv("OLLAMA_CONTEXT_LENGTH", "4096")
|
||||
gin.SetMode(gin.TestMode)
|
||||
|
||||
mock := mockRunner{
|
||||
@@ -878,6 +879,7 @@ func TestGenerateChat(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestGenerate(t *testing.T) {
|
||||
t.Setenv("OLLAMA_CONTEXT_LENGTH", "4096")
|
||||
gin.SetMode(gin.TestMode)
|
||||
|
||||
mock := mockRunner{
|
||||
@@ -2355,6 +2357,7 @@ func TestGenerateWithImages(t *testing.T) {
|
||||
// TestImageGenerateStreamFalse tests that image generation respects stream=false
|
||||
// and returns a single JSON response instead of streaming ndjson.
|
||||
func TestImageGenerateStreamFalse(t *testing.T) {
|
||||
t.Setenv("OLLAMA_CONTEXT_LENGTH", "4096")
|
||||
gin.SetMode(gin.TestMode)
|
||||
|
||||
p := t.TempDir()
|
||||
|
||||
127
server/routes_options_test.go
Normal file
127
server/routes_options_test.go
Normal file
@@ -0,0 +1,127 @@
|
||||
package server
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestModelOptionsNumCtxPriority(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
envContextLen string // empty means not set (uses 0 sentinel)
|
||||
defaultNumCtx int // VRAM-based default
|
||||
modelNumCtx int // 0 means not set in model
|
||||
requestNumCtx int // 0 means not set in request
|
||||
expectedNumCtx int
|
||||
}{
|
||||
{
|
||||
name: "vram default when nothing else set",
|
||||
envContextLen: "",
|
||||
defaultNumCtx: 32768,
|
||||
modelNumCtx: 0,
|
||||
requestNumCtx: 0,
|
||||
expectedNumCtx: 32768,
|
||||
},
|
||||
{
|
||||
name: "env var overrides vram default",
|
||||
envContextLen: "8192",
|
||||
defaultNumCtx: 32768,
|
||||
modelNumCtx: 0,
|
||||
requestNumCtx: 0,
|
||||
expectedNumCtx: 8192,
|
||||
},
|
||||
{
|
||||
name: "model overrides vram default",
|
||||
envContextLen: "",
|
||||
defaultNumCtx: 32768,
|
||||
modelNumCtx: 16384,
|
||||
requestNumCtx: 0,
|
||||
expectedNumCtx: 16384,
|
||||
},
|
||||
{
|
||||
name: "model overrides env var",
|
||||
envContextLen: "8192",
|
||||
defaultNumCtx: 32768,
|
||||
modelNumCtx: 16384,
|
||||
requestNumCtx: 0,
|
||||
expectedNumCtx: 16384,
|
||||
},
|
||||
{
|
||||
name: "request overrides everything",
|
||||
envContextLen: "8192",
|
||||
defaultNumCtx: 32768,
|
||||
modelNumCtx: 16384,
|
||||
requestNumCtx: 4096,
|
||||
expectedNumCtx: 4096,
|
||||
},
|
||||
{
|
||||
name: "request overrides vram default",
|
||||
envContextLen: "",
|
||||
defaultNumCtx: 32768,
|
||||
modelNumCtx: 0,
|
||||
requestNumCtx: 4096,
|
||||
expectedNumCtx: 4096,
|
||||
},
|
||||
{
|
||||
name: "request overrides model",
|
||||
envContextLen: "",
|
||||
defaultNumCtx: 32768,
|
||||
modelNumCtx: 16384,
|
||||
requestNumCtx: 4096,
|
||||
expectedNumCtx: 4096,
|
||||
},
|
||||
{
|
||||
name: "low vram tier default",
|
||||
envContextLen: "",
|
||||
defaultNumCtx: 4096,
|
||||
modelNumCtx: 0,
|
||||
requestNumCtx: 0,
|
||||
expectedNumCtx: 4096,
|
||||
},
|
||||
{
|
||||
name: "high vram tier default",
|
||||
envContextLen: "",
|
||||
defaultNumCtx: 262144,
|
||||
modelNumCtx: 0,
|
||||
requestNumCtx: 0,
|
||||
expectedNumCtx: 262144,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
// Set or clear environment variable
|
||||
if tt.envContextLen != "" {
|
||||
t.Setenv("OLLAMA_CONTEXT_LENGTH", tt.envContextLen)
|
||||
}
|
||||
|
||||
// Create server with VRAM-based default
|
||||
s := &Server{
|
||||
defaultNumCtx: tt.defaultNumCtx,
|
||||
}
|
||||
|
||||
// Create model options (use float64 as FromMap expects JSON-style numbers)
|
||||
var modelOpts map[string]any
|
||||
if tt.modelNumCtx != 0 {
|
||||
modelOpts = map[string]any{"num_ctx": float64(tt.modelNumCtx)}
|
||||
}
|
||||
model := &Model{
|
||||
Options: modelOpts,
|
||||
}
|
||||
|
||||
// Create request options (use float64 as FromMap expects JSON-style numbers)
|
||||
var requestOpts map[string]any
|
||||
if tt.requestNumCtx != 0 {
|
||||
requestOpts = map[string]any{"num_ctx": float64(tt.requestNumCtx)}
|
||||
}
|
||||
|
||||
opts, err := s.modelOptions(model, requestOpts)
|
||||
if err != nil {
|
||||
t.Fatalf("modelOptions failed: %v", err)
|
||||
}
|
||||
|
||||
if opts.NumCtx != tt.expectedNumCtx {
|
||||
t.Errorf("NumCtx = %d, want %d", opts.NumCtx, tt.expectedNumCtx)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -804,6 +804,7 @@ func (s *mockLlm) GetPort() int { return -
|
||||
func (s *mockLlm) GetDeviceInfos(ctx context.Context) []ml.DeviceInfo { return nil }
|
||||
func (s *mockLlm) HasExited() bool { return false }
|
||||
func (s *mockLlm) GetActiveDeviceIDs() []ml.DeviceID { return nil }
|
||||
func (s *mockLlm) ContextLength() int { return 0 }
|
||||
|
||||
// TestImageGenRunnerCanBeEvicted verifies that an image generation model
|
||||
// loaded in the scheduler can be evicted when idle.
|
||||
|
||||
@@ -347,6 +347,11 @@ func (s *Server) VRAMByGPU(id ml.DeviceID) uint64 {
|
||||
return s.vramSize
|
||||
}
|
||||
|
||||
// Context length is not applicable for image generation.
|
||||
func (s *Server) ContextLength() int {
|
||||
return 0
|
||||
}
|
||||
|
||||
func (s *Server) Embedding(ctx context.Context, input string) ([]float32, int, error) {
|
||||
return nil, 0, errors.New("not supported")
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user