app: expose server's default context length to UI

Parse the default_num_ctx from the server's "vram-based default context" log line and expose it through the inference compute API. This eliminates duplicate VRAM tier calculation logic in the frontend. - Add InferenceInfo struct with Computes and DefaultContextLength - Rename GetInferenceComputer to GetInferenceInfo - Handle missing default context line gracefully (older servers) - Add DefaultContextLength to InferenceComputeResponse - Update Settings UI to use server's default, disable slider while loading - Add disabled prop to Slider component (grays out + hides handle) - Migrate existing users with context_length=4096 to 0 (auto mode)
2026-02-02 19:54:33 -05:00 · 2026-02-02 16:25:29 -08:00
14 changed files with 187 additions and 463 deletions
--- a/app/server/server.go
+++ b/app/server/server.go
@@ -41,6 +41,11 @@ type InferenceCompute struct {
 	VRAM    string
 }

+type InferenceInfo struct {
+	Computes             []InferenceCompute
+	DefaultContextLength int
+}
+
 func New(s *store.Store, devMode bool) *Server {
 	p := resolvePath("ollama")
 	return &Server{store: s, bin: p, dev: devMode}
@@ -262,9 +267,12 @@ func openRotatingLog() (io.WriteCloser, error) {

 // Attempt to retrieve inference compute information from the server
 // log.  Set ctx to timeout to control how long to wait for the logs to appear
-func GetInferenceComputer(ctx context.Context) ([]InferenceCompute, error) {
-	inference := []InferenceCompute{}
-	marker := regexp.MustCompile(`inference compute.*library=`)
+func GetInferenceInfo(ctx context.Context) (*InferenceInfo, error) {
+	info := &InferenceInfo{}
+	computeMarker := regexp.MustCompile(`inference compute.*library=`)
+	defaultCtxMarker := regexp.MustCompile(`vram-based default context`)
+	defaultCtxRegex := regexp.MustCompile(`default_num_ctx=(\d+)`)
+
 	q := `inference compute.*%s=["]([^"]*)["]`
 	nq := `inference compute.*%s=(\S+)\s`
 	type regex struct {
@@ -330,8 +338,8 @@ func GetInferenceComputer(ctx context.Context) ([]InferenceCompute, error) {
 		scanner := bufio.NewScanner(file)
 		for scanner.Scan() {
 			line := scanner.Text()
-			match := marker.FindStringSubmatch(line)
-			if len(match) > 0 {
+			// Check for inference compute lines
+			if computeMarker.MatchString(line) {
 				ic := InferenceCompute{
 					Library: get("library", line),
 					Variant: get("variant", line),
@@ -342,12 +350,25 @@ func GetInferenceComputer(ctx context.Context) ([]InferenceCompute, error) {
 				}

 				slog.Info("Matched", "inference compute", ic)
-				inference = append(inference, ic)
-			} else {
-				// Break out on first non matching line after we start matching
-				if len(inference) > 0 {
-					return inference, nil
+				info.Computes = append(info.Computes, ic)
+				continue
+			}
+			// Check for default context length line
+			if defaultCtxMarker.MatchString(line) {
+				match := defaultCtxRegex.FindStringSubmatch(line)
+				if len(match) > 1 {
+					numCtx, err := strconv.Atoi(match[1])
+					if err == nil {
+						info.DefaultContextLength = numCtx
+						slog.Info("Matched default context length", "default_num_ctx", numCtx)
+					}
 				}
+				return info, nil
+			}
+			// If we've found compute info but hit a non-matching line, return what we have
+			// This handles older server versions that don't log the default context line
+			if len(info.Computes) > 0 {
+				return info, nil
 			}
 		}
 		time.Sleep(100 * time.Millisecond)
--- a/app/server/server_test.go
+++ b/app/server/server_test.go
@@ -136,44 +136,50 @@ func TestServerCmd(t *testing.T) {
 	}
 }

-func TestGetInferenceComputer(t *testing.T) {
+func TestGetInferenceInfo(t *testing.T) {
 	tests := []struct {
-		name string
-		log  string
-		exp  []InferenceCompute
+		name             string
+		log              string
+		expComputes      []InferenceCompute
+		expDefaultCtxLen int
 	}{
 		{
 			name: "metal",
 			log: `time=2025-06-30T09:23:07.374-07:00 level=DEBUG source=sched.go:108 msg="starting llm scheduler"
 time=2025-06-30T09:23:07.416-07:00 level=INFO source=types.go:130 msg="inference compute" id=0 library=metal variant="" compute="" driver=0.0 name="" total="96.0 GiB" available="96.0 GiB"
+time=2025-06-30T09:23:07.417-07:00 level=INFO source=routes.go:1721 msg="vram-based default context" total_vram="96.0 GiB" default_num_ctx=262144
 time=2025-06-30T09:25:56.197-07:00 level=DEBUG source=ggml.go:155 msg="key not found" key=general.alignment default=32
 `,
-			exp: []InferenceCompute{{
+			expComputes: []InferenceCompute{{
 				Library: "metal",
 				Driver:  "0.0",
 				VRAM:    "96.0 GiB",
 			}},
+			expDefaultCtxLen: 262144,
 		},
 		{
 			name: "cpu",
 			log: `time=2025-07-01T17:59:51.470Z level=INFO source=gpu.go:377 msg="no compatible GPUs were discovered"
 time=2025-07-01T17:59:51.470Z level=INFO source=types.go:130 msg="inference compute" id=0 library=cpu variant="" compute="" driver=0.0 name="" total="31.3 GiB" available="30.4 GiB"
+time=2025-07-01T17:59:51.471Z level=INFO source=routes.go:1721 msg="vram-based default context" total_vram="31.3 GiB" default_num_ctx=32768
 [GIN] 2025/07/01 - 18:00:09 | 200 |      50.263µs | 100.126.204.152 | HEAD     "/"
 `,
-			exp: []InferenceCompute{{
+			expComputes: []InferenceCompute{{
 				Library: "cpu",
 				Driver:  "0.0",
 				VRAM:    "31.3 GiB",
 			}},
+			expDefaultCtxLen: 32768,
 		},
 		{
 			name: "cuda1",
 			log: `time=2025-07-01T19:33:43.162Z level=DEBUG source=amd_linux.go:419 msg="amdgpu driver not detected /sys/module/amdgpu"
 releasing cuda driver library
 time=2025-07-01T19:33:43.162Z level=INFO source=types.go:130 msg="inference compute" id=GPU-452cac9f-6960-839c-4fb3-0cec83699196 library=cuda variant=v12 compute=6.1 driver=12.7 name="NVIDIA GeForce GT 1030" total="3.9 GiB" available="3.9 GiB"
+time=2025-07-01T19:33:43.163Z level=INFO source=routes.go:1721 msg="vram-based default context" total_vram="3.9 GiB" default_num_ctx=4096
 [GIN] 2025/07/01 - 18:00:09 | 200 |      50.263µs | 100.126.204.152 | HEAD     "/"
 `,
-			exp: []InferenceCompute{{
+			expComputes: []InferenceCompute{{
 				Library: "cuda",
 				Variant: "v12",
 				Compute: "6.1",
@@ -181,6 +187,7 @@ time=2025-07-01T19:33:43.162Z level=INFO source=types.go:130 msg="inference comp
 				Name:    "NVIDIA GeForce GT 1030",
 				VRAM:    "3.9 GiB",
 			}},
+			expDefaultCtxLen: 4096,
 		},
 		{
 			name: "frank",
@@ -188,9 +195,10 @@ time=2025-07-01T19:33:43.162Z level=INFO source=types.go:130 msg="inference comp
 		releasing cuda driver library
 		time=2025-07-01T19:36:13.315Z level=INFO source=types.go:130 msg="inference compute" id=GPU-d6de3398-9932-6902-11ec-fee8e424c8a2 library=cuda variant=v12 compute=7.5 driver=12.8 name="NVIDIA GeForce RTX 2080 Ti" total="10.6 GiB" available="10.4 GiB"
 		time=2025-07-01T19:36:13.315Z level=INFO source=types.go:130 msg="inference compute" id=GPU-9abb57639fa80c50 library=rocm variant="" compute=gfx1030 driver=6.3 name=1002:73bf total="16.0 GiB" available="1.3 GiB"
+		time=2025-07-01T19:36:13.316Z level=INFO source=routes.go:1721 msg="vram-based default context" total_vram="26.6 GiB" default_num_ctx=32768
 		[GIN] 2025/07/01 - 18:00:09 | 200 |      50.263µs | 100.126.204.152 | HEAD     "/"
 		`,
-			exp: []InferenceCompute{
+			expComputes: []InferenceCompute{
 				{
 					Library: "cuda",
 					Variant: "v12",
@@ -207,6 +215,20 @@ time=2025-07-01T19:33:43.162Z level=INFO source=types.go:130 msg="inference comp
 					VRAM:    "16.0 GiB",
 				},
 			},
+			expDefaultCtxLen: 32768,
+		},
+		{
+			name: "missing_default_context",
+			log: `time=2025-06-30T09:23:07.374-07:00 level=DEBUG source=sched.go:108 msg="starting llm scheduler"
+time=2025-06-30T09:23:07.416-07:00 level=INFO source=types.go:130 msg="inference compute" id=0 library=metal variant="" compute="" driver=0.0 name="" total="96.0 GiB" available="96.0 GiB"
+time=2025-06-30T09:25:56.197-07:00 level=DEBUG source=ggml.go:155 msg="key not found" key=general.alignment default=32
+`,
+			expComputes: []InferenceCompute{{
+				Library: "metal",
+				Driver:  "0.0",
+				VRAM:    "96.0 GiB",
+			}},
+			expDefaultCtxLen: 0, // No default context line, should return 0
 		},
 	}
 	for _, tt := range tests {
@@ -219,18 +241,21 @@ time=2025-07-01T19:33:43.162Z level=INFO source=types.go:130 msg="inference comp
 			}
 			ctx, cancel := context.WithTimeout(t.Context(), 10*time.Millisecond)
 			defer cancel()
-			ics, err := GetInferenceComputer(ctx)
+			info, err := GetInferenceInfo(ctx)
 			if err != nil {
-				t.Fatalf(" failed to get inference compute: %v", err)
+				t.Fatalf("failed to get inference info: %v", err)
 			}
-			if !reflect.DeepEqual(ics, tt.exp) {
-				t.Fatalf("got:\n%#v\nwant:\n%#v", ics, tt.exp)
+			if !reflect.DeepEqual(info.Computes, tt.expComputes) {
+				t.Fatalf("computes mismatch\ngot:\n%#v\nwant:\n%#v", info.Computes, tt.expComputes)
+			}
+			if info.DefaultContextLength != tt.expDefaultCtxLen {
+				t.Fatalf("default context length mismatch: got %d, want %d", info.DefaultContextLength, tt.expDefaultCtxLen)
 			}
 		})
 	}
 }

-func TestGetInferenceComputerTimeout(t *testing.T) {
+func TestGetInferenceInfoTimeout(t *testing.T) {
 	ctx, cancel := context.WithTimeout(t.Context(), 10*time.Millisecond)
 	defer cancel()
 	tmpDir := t.TempDir()
@@ -239,7 +264,7 @@ func TestGetInferenceComputerTimeout(t *testing.T) {
 	if err != nil {
 		t.Fatalf("failed to write log file %s: %s", serverLogPath, err)
 	}
-	_, err = GetInferenceComputer(ctx)
+	_, err = GetInferenceInfo(ctx)
 	if err == nil {
 		t.Fatal("expected timeout")
 	}
--- a/app/store/database.go
+++ b/app/store/database.go
@@ -14,7 +14,7 @@ import (

 // currentSchemaVersion defines the current database schema version.
 // Increment this when making schema changes that require migrations.
-const currentSchemaVersion = 12
+const currentSchemaVersion = 13

 // database wraps the SQLite connection.
 // SQLite handles its own locking for concurrent access:
@@ -73,7 +73,7 @@ func (db *database) init() error {
 		agent BOOLEAN NOT NULL DEFAULT 0,
 		tools BOOLEAN NOT NULL DEFAULT 0,
 		working_dir TEXT NOT NULL DEFAULT '',
-		context_length INTEGER NOT NULL DEFAULT 4096,
+		context_length INTEGER NOT NULL DEFAULT 0,
 		window_width INTEGER NOT NULL DEFAULT 0,
 		window_height INTEGER NOT NULL DEFAULT 0,
 		config_migrated BOOLEAN NOT NULL DEFAULT 0,
@@ -244,6 +244,12 @@ func (db *database) migrate() error {
 				return fmt.Errorf("migrate v11 to v12: %w", err)
 			}
 			version = 12
+		case 12:
+			// change default context_length from 4096 to 0 (VRAM-based tiered defaults)
+			if err := db.migrateV12ToV13(); err != nil {
+				return fmt.Errorf("migrate v12 to v13: %w", err)
+			}
+			version = 13
 		default:
 			// If we have a version we don't recognize, just set it to current
 			// This might happen during development
@@ -452,6 +458,23 @@ func (db *database) migrateV11ToV12() error {
 	return nil
 }

+// migrateV12ToV13 changes the default context_length from 4096 to 0
+// When context_length is 0, the ollama server uses VRAM-based tiered defaults
+func (db *database) migrateV12ToV13() error {
+	// Update users who have the old default of 4096 to the new default of 0
+	_, err := db.conn.Exec(`UPDATE settings SET context_length = 0 WHERE context_length = 4096`)
+	if err != nil {
+		return fmt.Errorf("update context_length default: %w", err)
+	}
+
+	_, err = db.conn.Exec(`UPDATE settings SET schema_version = 13`)
+	if err != nil {
+		return fmt.Errorf("update schema version: %w", err)
+	}
+
+	return nil
+}
+
 // cleanupOrphanedData removes orphaned records that may exist due to the foreign key bug
 func (db *database) cleanupOrphanedData() error {
 	_, err := db.conn.Exec(`
--- a/app/store/testdata/schema.sql
+++ b/app/store/testdata/schema.sql
@@ -13,7 +13,7 @@ CREATE TABLE IF NOT EXISTS settings (
    agent BOOLEAN NOT NULL DEFAULT 0,
    tools BOOLEAN NOT NULL DEFAULT 0,
    working_dir TEXT NOT NULL DEFAULT '',
-    context_length INTEGER NOT NULL DEFAULT 4096,
+    context_length INTEGER NOT NULL DEFAULT 0,
    window_width INTEGER NOT NULL DEFAULT 0,
    window_height INTEGER NOT NULL DEFAULT 0,
    config_migrated BOOLEAN NOT NULL DEFAULT 0,
--- a/app/ui/app/codegen/gotypes.gen.ts
+++ b/app/ui/app/codegen/gotypes.gen.ts
@@ -289,10 +289,12 @@ export class InferenceCompute {
 }
 export class InferenceComputeResponse {
    inferenceComputes: InferenceCompute[];
+    defaultContextLength: number;

    constructor(source: any = {}) {
        if ('string' === typeof source) source = JSON.parse(source);
        this.inferenceComputes = this.convertValues(source["inferenceComputes"], InferenceCompute);
+        this.defaultContextLength = source["defaultContextLength"];
    }

 	convertValues(a: any, classs: any, asMap: boolean = false): any {
--- a/app/ui/app/src/api.ts
+++ b/app/ui/app/src/api.ts
@@ -4,7 +4,6 @@ import {
  ChatEvent,
  DownloadEvent,
  ErrorEvent,
-  InferenceCompute,
  InferenceComputeResponse,
  ModelCapabilitiesResponse,
  Model,
@@ -379,7 +378,7 @@ export async function* pullModel(
  }
 }

-export async function getInferenceCompute(): Promise<InferenceCompute[]> {
+export async function getInferenceCompute(): Promise<InferenceComputeResponse> {
  const response = await fetch(`${API_BASE}/api/v1/inference-compute`);
  if (!response.ok) {
    throw new Error(
@@ -388,8 +387,7 @@ export async function getInferenceCompute(): Promise<InferenceCompute[]> {
  }

  const data = await response.json();
-  const inferenceComputeResponse = new InferenceComputeResponse(data);
-  return inferenceComputeResponse.inferenceComputes || [];
+  return new InferenceComputeResponse(data);
 }

 export async function fetchHealth(): Promise<boolean> {
--- a/app/ui/app/src/components/Settings.tsx
+++ b/app/ui/app/src/components/Settings.tsx
@@ -19,7 +19,7 @@ import { Settings as SettingsType } from "@/gotypes";
 import { useNavigate } from "@tanstack/react-router";
 import { useUser } from "@/hooks/useUser";
 import { useQuery, useMutation, useQueryClient } from "@tanstack/react-query";
-import { getSettings, updateSettings } from "@/api";
+import { getSettings, updateSettings, getInferenceCompute } from "@/api";

 function AnimatedDots() {
  return (
@@ -65,6 +65,13 @@ export default function Settings() {

  const settings = settingsData?.settings || null;

+  const { data: inferenceComputeResponse } = useQuery({
+    queryKey: ["inferenceCompute"],
+    queryFn: getInferenceCompute,
+  });
+
+  const defaultContextLength = inferenceComputeResponse?.defaultContextLength;
+
  const updateSettingsMutation = useMutation({
    mutationFn: updateSettings,
    onSuccess: () => {
@@ -148,7 +155,7 @@ export default function Settings() {
        Models: "",
        Agent: false,
        Tools: false,
-        ContextLength: 4096,
+        ContextLength: 0,
        AirplaneMode: false,
      });
      updateSettingsMutation.mutate(defaultSettings);
@@ -419,13 +426,11 @@ export default function Settings() {
                    </Description>
                    <div className="mt-3">
                      <Slider
-                        value={(() => {
-                          // Otherwise use the settings value
-                          return settings.ContextLength || 4096;
-                        })()}
+                        value={settings.ContextLength || defaultContextLength || 0}
                        onChange={(value) => {
                          handleChange("ContextLength", value);
                        }}
+                        disabled={!defaultContextLength}
                        options={[
                          { value: 4096, label: "4k" },
                          { value: 8192, label: "8k" },
@@ -440,6 +445,7 @@ export default function Settings() {
                  </div>
                </div>
              </Field>
+
              {/* Airplane Mode */}
              <Field>
                <div className="flex items-start justify-between gap-4">
--- a/app/ui/app/src/components/ui/slider.tsx
+++ b/app/ui/app/src/components/ui/slider.tsx
@@ -6,10 +6,11 @@ export interface SliderProps {
  value?: number;
  onChange?: (value: number) => void;
  className?: string;
+  disabled?: boolean;
 }

 const Slider = React.forwardRef<HTMLDivElement, SliderProps>(
-  ({ label, options, value = 0, onChange }, ref) => {
+  ({ label, options, value = 0, onChange, disabled = false }, ref) => {
    const [selectedValue, setSelectedValue] = React.useState(value);
    const [isDragging, setIsDragging] = React.useState(false);
    const containerRef = React.useRef<HTMLDivElement>(null);
@@ -20,6 +21,7 @@ const Slider = React.forwardRef<HTMLDivElement, SliderProps>(
    }, [value]);

    const handleClick = (optionValue: number) => {
+      if (disabled) return;
      setSelectedValue(optionValue);
      onChange?.(optionValue);
    };
@@ -39,6 +41,7 @@ const Slider = React.forwardRef<HTMLDivElement, SliderProps>(
    };

    const handleMouseDown = (e: React.MouseEvent) => {
+      if (disabled) return;
      setIsDragging(true);
      e.preventDefault();
    };
@@ -77,7 +80,7 @@ const Slider = React.forwardRef<HTMLDivElement, SliderProps>(
    }

    return (
-      <div className="space-y-2" ref={ref}>
+      <div className={`space-y-2 ${disabled ? "opacity-50" : ""}`} ref={ref}>
        {label && <label className="text-sm font-medium">{label}</label>}
        <div className="relative">
          <div className="absolute top-[9px] left-2 right-2 h-1 bg-neutral-200 dark:bg-neutral-700 pointer-events-none rounded-full" />
@@ -88,10 +91,11 @@ const Slider = React.forwardRef<HTMLDivElement, SliderProps>(
                <button
                  onClick={() => handleClick(option.value)}
                  onMouseDown={handleMouseDown}
-                  className="relative px-3 py-6 -mx-3 -my-6 z-10 cursor-pointer"
+                  disabled={disabled}
+                  className={`relative px-3 py-6 -mx-3 -my-6 z-10 ${disabled ? "cursor-not-allowed" : "cursor-pointer"}`}
                >
                  <div className="relative w-5 h-5 flex items-center justify-center">
-                    {selectedValue === option.value && (
+                    {selectedValue === option.value && !disabled && (
                      <div className="w-4 h-4 bg-white dark:bg-white border border-neutral-400 dark:border-neutral-500 rounded-full cursor-grab active:cursor-grabbing" />
                    )}
                  </div>
--- a/app/ui/app/src/hooks/useSelectedModel.ts
+++ b/app/ui/app/src/hooks/useSelectedModel.ts
@@ -26,12 +26,14 @@ export function useSelectedModel(currentChatId?: string, searchQuery?: string) {
    currentChatId && currentChatId !== "new" ? currentChatId : "",
  );

-  const { data: inferenceComputes = [] } = useQuery({
-    queryKey: ["inference-compute"],
+  const { data: inferenceComputeResponse } = useQuery({
+    queryKey: ["inferenceCompute"],
    queryFn: getInferenceCompute,
    enabled: !settings.selectedModel, // Only fetch if no model is selected
  });

+  const inferenceComputes = inferenceComputeResponse?.inferenceComputes || [];
+
  const totalVRAM = useMemo(
    () => getTotalVRAM(inferenceComputes),
    [inferenceComputes],
--- a/app/ui/responses/types.go
+++ b/app/ui/responses/types.go
@@ -45,7 +45,8 @@ type InferenceCompute struct {
 }

 type InferenceComputeResponse struct {
-	InferenceComputes []InferenceCompute `json:"inferenceComputes"`
+	InferenceComputes    []InferenceCompute `json:"inferenceComputes"`
+	DefaultContextLength int                `json:"defaultContextLength"`
 }

 type ModelCapabilitiesResponse struct {
--- a/app/ui/ui.go
+++ b/app/ui/ui.go
@@ -1417,11 +1417,6 @@ func (s *Server) getSettings(w http.ResponseWriter, r *http.Request) error {
 		settings.Models = envconfig.Models()
 	}

-	// set default context length if not set
-	if settings.ContextLength == 0 {
-		settings.ContextLength = 4096
-	}
-
 	// Include current runtime settings
 	settings.Agent = s.Agent
 	settings.Tools = s.Tools
@@ -1463,14 +1458,14 @@ func (s *Server) settings(w http.ResponseWriter, r *http.Request) error {
 func (s *Server) getInferenceCompute(w http.ResponseWriter, r *http.Request) error {
 	ctx, cancel := context.WithTimeout(r.Context(), 500*time.Millisecond)
 	defer cancel()
-	serverInferenceComputes, err := server.GetInferenceComputer(ctx)
+	info, err := server.GetInferenceInfo(ctx)
 	if err != nil {
-		s.log().Error("failed to get inference compute", "error", err)
-		return fmt.Errorf("failed to get inference compute: %w", err)
+		s.log().Error("failed to get inference info", "error", err)
+		return fmt.Errorf("failed to get inference info: %w", err)
 	}

-	inferenceComputes := make([]responses.InferenceCompute, len(serverInferenceComputes))
-	for i, ic := range serverInferenceComputes {
+	inferenceComputes := make([]responses.InferenceCompute, len(info.Computes))
+	for i, ic := range info.Computes {
 		inferenceComputes[i] = responses.InferenceCompute{
 			Library: ic.Library,
 			Variant: ic.Variant,
@@ -1482,7 +1477,8 @@ func (s *Server) getInferenceCompute(w http.ResponseWriter, r *http.Request) err
 	}

 	response := responses.InferenceComputeResponse{
-		InferenceComputes: inferenceComputes,
+		InferenceComputes:    inferenceComputes,
+		DefaultContextLength: info.DefaultContextLength,
 	}

 	w.Header().Set("Content-Type", "application/json")
--- a/cmd/config/integrations.go
+++ b/cmd/config/integrations.go
@@ -13,7 +13,6 @@ import (
 	"time"

 	"github.com/ollama/ollama/api"
-	"github.com/ollama/ollama/progress"
 	"github.com/spf13/cobra"
 )

@@ -50,14 +49,6 @@ var integrations = map[string]Runner{
 	"openclaw": &Openclaw{},
 }

-// recommendedModels are shown when the user has no models or as suggestions.
-// Order matters: local models first, then cloud models.
-var recommendedModels = []selectItem{
-	{Name: "glm-4.7-flash", Description: "Recommended (requires ~25GB VRAM)"},
-	{Name: "glm-4.7:cloud", Description: "recommended"},
-	{Name: "kimi-k2.5:cloud", Description: "recommended"},
-}
-
 // integrationAliases are hidden from the interactive selector but work as CLI arguments.
 var integrationAliases = map[string]bool{
 	"clawdbot": true,
@@ -103,25 +94,62 @@ func selectModels(ctx context.Context, name, current string) ([]string, error) {
 		return nil, err
 	}

-	var existing []modelInfo
-	for _, m := range models.Models {
-		existing = append(existing, modelInfo{Name: m.Name, Remote: m.RemoteModel != ""})
+	if len(models.Models) == 0 {
+		return nil, fmt.Errorf("no models available, run 'ollama pull <model>' first")
 	}

+	var items []selectItem
+	cloudModels := make(map[string]bool)
+	for _, m := range models.Models {
+		if m.RemoteModel != "" {
+			cloudModels[m.Name] = true
+		}
+		items = append(items, selectItem{Name: m.Name})
+	}
+
+	if len(items) == 0 {
+		return nil, fmt.Errorf("no local models available, run 'ollama pull <model>' first")
+	}
+
+	// Get previously configured models (saved config takes precedence)
 	var preChecked []string
 	if saved, err := loadIntegration(name); err == nil {
 		preChecked = saved.Models
 	} else if editor, ok := r.(Editor); ok {
 		preChecked = editor.Models()
 	}
-
-	items, preChecked, existingModels, cloudModels := buildModelList(existing, preChecked, current)
-
-	if len(items) == 0 {
-		return nil, fmt.Errorf("no models available")
+	checked := make(map[string]bool, len(preChecked))
+	for _, n := range preChecked {
+		checked[n] = true
 	}

+	// Resolve current to full name (e.g., "llama3.2" -> "llama3.2:latest")
+	for _, item := range items {
+		if item.Name == current || strings.HasPrefix(item.Name, current+":") {
+			current = item.Name
+			break
+		}
+	}
+
+	// If current model is configured, move to front of preChecked
+	if checked[current] {
+		preChecked = append([]string{current}, slices.DeleteFunc(preChecked, func(m string) bool { return m == current })...)
+	}
+
+	// Sort: checked first, then alphabetical
+	slices.SortFunc(items, func(a, b selectItem) int {
+		ac, bc := checked[a.Name], checked[b.Name]
+		if ac != bc {
+			if ac {
+				return -1
+			}
+			return 1
+		}
+		return strings.Compare(strings.ToLower(a.Name), strings.ToLower(b.Name))
+	})
+
 	var selected []string
+	// only editors support multi-model selection
 	if _, ok := r.(Editor); ok {
 		selected, err = multiSelectPrompt(fmt.Sprintf("Select models for %s:", r), items, preChecked)
 		if err != nil {
@@ -135,27 +163,7 @@ func selectModels(ctx context.Context, name, current string) ([]string, error) {
 		selected = []string{model}
 	}

-	var toPull []string
-	for _, m := range selected {
-		if !existingModels[m] {
-			toPull = append(toPull, m)
-		}
-	}
-	if len(toPull) > 0 {
-		msg := fmt.Sprintf("Download %s?", strings.Join(toPull, ", "))
-		if ok, err := confirmPrompt(msg); err != nil {
-			return nil, err
-		} else if !ok {
-			return nil, errCancelled
-		}
-		for _, m := range toPull {
-			fmt.Fprintf(os.Stderr, "\n")
-			if err := pullModel(ctx, client, m); err != nil {
-				return nil, fmt.Errorf("failed to pull %s: %w", m, err)
-			}
-		}
-	}
-
+	// if any model in selected is a cloud model, ensure signed in
 	var selectedCloudModels []string
 	for _, m := range selected {
 		if cloudModels[m] {
@@ -278,6 +286,7 @@ Examples:
 				return fmt.Errorf("unknown integration: %s", name)
 			}

+			// If launching without --model, use saved config if available
 			if !configFlag && modelFlag == "" {
 				if config, err := loadIntegration(name); err == nil && len(config.Models) > 0 {
 					return runIntegration(name, config.Models[0])
@@ -286,6 +295,7 @@ Examples:

 			var models []string
 			if modelFlag != "" {
+				// When --model is specified, merge with existing models (new model becomes default)
 				models = []string{modelFlag}
 				if existing, err := loadIntegration(name); err == nil && len(existing.Models) > 0 {
 					for _, m := range existing.Models {
@@ -354,150 +364,3 @@ Examples:
 	cmd.Flags().BoolVar(&configFlag, "config", false, "Configure without launching")
 	return cmd
 }
-
-type modelInfo struct {
-	Name   string
-	Remote bool
-}
-
-// buildModelList merges existing models with recommendations, sorts them, and returns
-// the ordered items along with maps of existing and cloud model names.
-func buildModelList(existing []modelInfo, preChecked []string, current string) (items []selectItem, orderedChecked []string, existingModels, cloudModels map[string]bool) {
-	existingModels = make(map[string]bool)
-	cloudModels = make(map[string]bool)
-	recommended := make(map[string]bool)
-	var hasLocalModel, hasCloudModel bool
-
-	for _, rec := range recommendedModels {
-		recommended[rec.Name] = true
-	}
-
-	for _, m := range existing {
-		existingModels[m.Name] = true
-		if m.Remote {
-			cloudModels[m.Name] = true
-			hasCloudModel = true
-		} else {
-			hasLocalModel = true
-		}
-		displayName := strings.TrimSuffix(m.Name, ":latest")
-		existingModels[displayName] = true
-		item := selectItem{Name: displayName}
-		if recommended[displayName] {
-			item.Description = "recommended"
-		}
-		items = append(items, item)
-	}
-
-	for _, rec := range recommendedModels {
-		if existingModels[rec.Name] || existingModels[rec.Name+":latest"] {
-			continue
-		}
-		items = append(items, rec)
-		if isCloudModel(rec.Name) {
-			cloudModels[rec.Name] = true
-		}
-	}
-
-	checked := make(map[string]bool, len(preChecked))
-	for _, n := range preChecked {
-		checked[n] = true
-	}
-
-	// Resolve current to full name (e.g., "llama3.2" -> "llama3.2:latest")
-	for _, item := range items {
-		if item.Name == current || strings.HasPrefix(item.Name, current+":") {
-			current = item.Name
-			break
-		}
-	}
-
-	if checked[current] {
-		preChecked = append([]string{current}, slices.DeleteFunc(preChecked, func(m string) bool { return m == current })...)
-	}
-
-	// Non-existing models get "install?" suffix and are pushed to the bottom.
-	// When user has no models, preserve recommended order.
-	notInstalled := make(map[string]bool)
-	for i := range items {
-		if !existingModels[items[i].Name] {
-			notInstalled[items[i].Name] = true
-			items[i].Description = "recommended, install?"
-		}
-	}
-
-	if hasLocalModel || hasCloudModel {
-		slices.SortStableFunc(items, func(a, b selectItem) int {
-			ac, bc := checked[a.Name], checked[b.Name]
-			aNew, bNew := notInstalled[a.Name], notInstalled[b.Name]
-
-			if ac != bc {
-				if ac {
-					return -1
-				}
-				return 1
-			}
-			if !ac && !bc && aNew != bNew {
-				if aNew {
-					return 1
-				}
-				return -1
-			}
-			return strings.Compare(strings.ToLower(a.Name), strings.ToLower(b.Name))
-		})
-	}
-
-	return items, preChecked, existingModels, cloudModels
-}
-
-func isCloudModel(name string) bool {
-	return strings.HasSuffix(name, ":cloud")
-}
-
-func pullModel(ctx context.Context, client *api.Client, model string) error {
-	p := progress.NewProgress(os.Stderr)
-	defer p.Stop()
-
-	bars := make(map[string]*progress.Bar)
-	var status string
-	var spinner *progress.Spinner
-
-	fn := func(resp api.ProgressResponse) error {
-		if resp.Digest != "" {
-			if resp.Completed == 0 {
-				return nil
-			}
-
-			if spinner != nil {
-				spinner.Stop()
-			}
-
-			bar, ok := bars[resp.Digest]
-			if !ok {
-				name, isDigest := strings.CutPrefix(resp.Digest, "sha256:")
-				name = strings.TrimSpace(name)
-				if isDigest {
-					name = name[:min(12, len(name))]
-				}
-				bar = progress.NewBar(fmt.Sprintf("pulling %s:", name), resp.Total, resp.Completed)
-				bars[resp.Digest] = bar
-				p.Add(resp.Digest, bar)
-			}
-
-			bar.Set(resp.Completed)
-		} else if status != resp.Status {
-			if spinner != nil {
-				spinner.Stop()
-			}
-
-			status = resp.Status
-			spinner = progress.NewSpinner(status)
-			p.Add(status, spinner)
-		}
-
-		return nil
-	}
-
-	request := api.PullRequest{Name: model}
-	return client.Pull(ctx, &request, fn)
-}
--- a/cmd/config/integrations_test.go
+++ b/cmd/config/integrations_test.go
@@ -5,7 +5,6 @@ import (
 	"strings"
 	"testing"

-	"github.com/google/go-cmp/cmp"
 	"github.com/spf13/cobra"
 )

@@ -175,226 +174,15 @@ func TestLaunchCmd_NilHeartbeat(t *testing.T) {
 func TestAllIntegrations_HaveRequiredMethods(t *testing.T) {
 	for name, r := range integrations {
 		t.Run(name, func(t *testing.T) {
+			// Test String() doesn't panic and returns non-empty
 			displayName := r.String()
 			if displayName == "" {
 				t.Error("String() should not return empty")
 			}
+
+			// Test Run() exists (we can't call it without actually running the command)
+			// Just verify the method is available
 			var _ func(string) error = r.Run
 		})
 	}
 }
-
-func TestIsCloudModel(t *testing.T) {
-	tests := []struct {
-		name string
-		want bool
-	}{
-		{"glm-4.7:cloud", true},
-		{"kimi-k2.5:cloud", true},
-		{"glm-4.7-flash", false},
-		{"glm-4.7-flash:latest", false},
-		{"cloud-model", false},
-		{"model:cloudish", false},
-	}
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			if got := isCloudModel(tt.name); got != tt.want {
-				t.Errorf("isCloudModel(%q) = %v, want %v", tt.name, got, tt.want)
-			}
-		})
-	}
-}
-
-func names(items []selectItem) []string {
-	var out []string
-	for _, item := range items {
-		out = append(out, item.Name)
-	}
-	return out
-}
-
-func TestBuildModelList_NoExistingModels(t *testing.T) {
-	items, _, _, _ := buildModelList(nil, nil, "")
-
-	want := []string{"glm-4.7-flash", "glm-4.7:cloud", "kimi-k2.5:cloud"}
-	if diff := cmp.Diff(want, names(items)); diff != "" {
-		t.Errorf("with no existing models, items should be recommended in order (-want +got):\n%s", diff)
-	}
-
-	for _, item := range items {
-		if item.Description != "recommended, install?" {
-			t.Errorf("item %q should have description 'install?', got %q", item.Name, item.Description)
-		}
-	}
-}
-
-func TestBuildModelList_OnlyLocalModels_CloudRecsAtBottom(t *testing.T) {
-	existing := []modelInfo{
-		{Name: "llama3.2:latest", Remote: false},
-		{Name: "qwen2.5:latest", Remote: false},
-	}
-
-	items, _, _, _ := buildModelList(existing, nil, "")
-	got := names(items)
-
-	want := []string{"llama3.2", "qwen2.5", "glm-4.7-flash", "glm-4.7:cloud", "kimi-k2.5:cloud"}
-	if diff := cmp.Diff(want, got); diff != "" {
-		t.Errorf("cloud recs should be at bottom (-want +got):\n%s", diff)
-	}
-}
-
-func TestBuildModelList_BothCloudAndLocal_RegularSort(t *testing.T) {
-	existing := []modelInfo{
-		{Name: "llama3.2:latest", Remote: false},
-		{Name: "glm-4.7:cloud", Remote: true},
-	}
-
-	items, _, _, _ := buildModelList(existing, nil, "")
-	got := names(items)
-
-	want := []string{"glm-4.7:cloud", "llama3.2", "glm-4.7-flash", "kimi-k2.5:cloud"}
-	if diff := cmp.Diff(want, got); diff != "" {
-		t.Errorf("mixed models should be alphabetical (-want +got):\n%s", diff)
-	}
-}
-
-func TestBuildModelList_PreCheckedFirst(t *testing.T) {
-	existing := []modelInfo{
-		{Name: "llama3.2:latest", Remote: false},
-		{Name: "glm-4.7:cloud", Remote: true},
-	}
-
-	items, _, _, _ := buildModelList(existing, []string{"llama3.2"}, "")
-	got := names(items)
-
-	if got[0] != "llama3.2" {
-		t.Errorf("pre-checked model should be first, got %v", got)
-	}
-}
-
-func TestBuildModelList_ExistingRecommendedMarked(t *testing.T) {
-	existing := []modelInfo{
-		{Name: "glm-4.7-flash", Remote: false},
-		{Name: "glm-4.7:cloud", Remote: true},
-	}
-
-	items, _, _, _ := buildModelList(existing, nil, "")
-
-	for _, item := range items {
-		switch item.Name {
-		case "glm-4.7-flash", "glm-4.7:cloud":
-			if item.Description != "recommended" {
-				t.Errorf("installed recommended %q should have description 'recommended', got %q", item.Name, item.Description)
-			}
-		case "kimi-k2.5:cloud":
-			if item.Description != "recommended, install?" {
-				t.Errorf("non-installed recommended %q should have description 'install?', got %q", item.Name, item.Description)
-			}
-		}
-	}
-}
-
-func TestBuildModelList_ExistingCloudModelsNotPushedToBottom(t *testing.T) {
-	existing := []modelInfo{
-		{Name: "glm-4.7-flash", Remote: false},
-		{Name: "glm-4.7:cloud", Remote: true},
-	}
-
-	items, _, _, _ := buildModelList(existing, nil, "")
-	got := names(items)
-
-	// glm-4.7-flash and glm-4.7:cloud are installed so they sort normally;
-	// kimi-k2.5:cloud and qwen3:0.6b are not installed so they go to the bottom
-	want := []string{"glm-4.7-flash", "glm-4.7:cloud", "kimi-k2.5:cloud"}
-	if diff := cmp.Diff(want, got); diff != "" {
-		t.Errorf("existing cloud models should sort normally (-want +got):\n%s", diff)
-	}
-}
-
-func TestBuildModelList_HasRecommendedCloudModel_OnlyNonInstalledAtBottom(t *testing.T) {
-	existing := []modelInfo{
-		{Name: "llama3.2:latest", Remote: false},
-		{Name: "kimi-k2.5:cloud", Remote: true},
-	}
-
-	items, _, _, _ := buildModelList(existing, nil, "")
-	got := names(items)
-
-	// kimi-k2.5:cloud is installed so it sorts normally;
-	// the rest of the recommendations are not installed so they go to the bottom
-	want := []string{"kimi-k2.5:cloud", "llama3.2", "glm-4.7-flash", "glm-4.7:cloud"}
-	if diff := cmp.Diff(want, got); diff != "" {
-		t.Errorf("only non-installed models should be at bottom (-want +got):\n%s", diff)
-	}
-
-	// Non-installed models should have "recommended, install?" description
-	for _, item := range items {
-		if !slices.Contains([]string{"kimi-k2.5:cloud", "llama3.2"}, item.Name) {
-			if item.Description != "recommended, install?" {
-				t.Errorf("non-installed %q should have description 'install?', got %q", item.Name, item.Description)
-			}
-		}
-	}
-}
-
-func TestBuildModelList_LatestTagStripped(t *testing.T) {
-	existing := []modelInfo{
-		{Name: "glm-4.7-flash:latest", Remote: false},
-		{Name: "llama3.2:latest", Remote: false},
-	}
-
-	items, _, existingModels, _ := buildModelList(existing, nil, "")
-	got := names(items)
-
-	// :latest should be stripped from display names
-	for _, name := range got {
-		if strings.HasSuffix(name, ":latest") {
-			t.Errorf("name %q should not have :latest suffix", name)
-		}
-	}
-
-	// glm-4.7-flash should not be duplicated (existing :latest matches the recommendation)
-	count := 0
-	for _, name := range got {
-		if name == "glm-4.7-flash" {
-			count++
-		}
-	}
-	if count != 1 {
-		t.Errorf("glm-4.7-flash should appear exactly once, got %d in %v", count, got)
-	}
-
-	// Stripped name should be in existingModels so it won't be pulled
-	if !existingModels["glm-4.7-flash"] {
-		t.Error("glm-4.7-flash should be in existingModels")
-	}
-}
-
-func TestBuildModelList_ReturnsExistingAndCloudMaps(t *testing.T) {
-	existing := []modelInfo{
-		{Name: "llama3.2:latest", Remote: false},
-		{Name: "glm-4.7:cloud", Remote: true},
-	}
-
-	_, _, existingModels, cloudModels := buildModelList(existing, nil, "")
-
-	if !existingModels["llama3.2"] {
-		t.Error("llama3.2 should be in existingModels")
-	}
-	if !existingModels["glm-4.7:cloud"] {
-		t.Error("glm-4.7:cloud should be in existingModels")
-	}
-	if existingModels["glm-4.7-flash"] {
-		t.Error("glm-4.7-flash should not be in existingModels (it's a recommendation)")
-	}
-
-	if !cloudModels["glm-4.7:cloud"] {
-		t.Error("glm-4.7:cloud should be in cloudModels")
-	}
-	if !cloudModels["kimi-k2.5:cloud"] {
-		t.Error("kimi-k2.5:cloud should be in cloudModels (recommended cloud)")
-	}
-	if cloudModels["llama3.2"] {
-		t.Error("llama3.2 should not be in cloudModels")
-	}
-}
--- a/cmd/config/selector.go
+++ b/cmd/config/selector.go
@@ -353,15 +353,10 @@ func renderMultiSelect(w io.Writer, prompt string, s *multiSelectState) int {
 				suffix = " " + ansiGray + "(default)" + ansiReset
 			}

-			desc := ""
-			if item.Description != "" {
-				desc = " " + ansiGray + "- " + item.Description + ansiReset
-			}
-
 			if idx == s.highlighted && !s.focusOnButton {
-				fmt.Fprintf(w, "  %s%s %s %s%s%s%s\r\n", ansiBold, prefix, checkbox, item.Name, ansiReset, desc, suffix)
+				fmt.Fprintf(w, "  %s%s %s %s%s%s\r\n", ansiBold, prefix, checkbox, item.Name, ansiReset, suffix)
 			} else {
-				fmt.Fprintf(w, "  %s %s %s%s%s\r\n", prefix, checkbox, item.Name, desc, suffix)
+				fmt.Fprintf(w, "  %s %s %s%s\r\n", prefix, checkbox, item.Name, suffix)
 			}
 			lineCount++
 		}