fix lint

fix tests
server: usage api
2026-02-02 19:54:33 -05:00 · 2026-01-28 13:16:52 -08:00 · 2026-01-28 13:07:48 -08:00 · 2026-01-27 17:01:18 -08:00
38 changed files with 578 additions and 763 deletions
--- a/README.md
+++ b/README.md
@@ -358,7 +358,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [Odin Runes](https://github.com/leonid20000/OdinRunes)
 - [LLM-X](https://github.com/mrdjohnson/llm-x) (Progressive Web App)
 - [AnythingLLM (Docker + MacOs/Windows/Linux native app)](https://github.com/Mintplex-Labs/anything-llm)
- [Screenpipe](https://github.com/mediar-ai/screenpipe) (24/7 screen & mic recording with AI-powered search, uses Ollama for local LLM features)
 - [Ollama Basic Chat: Uses HyperDiv Reactive UI](https://github.com/rapidarchitect/ollama_basic_chat)
 - [Ollama-chats RPG](https://github.com/drazdra/ollama-chats)
 - [IntelliBar](https://intellibar.app/) (AI-powered assistant for macOS)
@@ -466,7 +465,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [Clueless](https://github.com/KashyapTan/clueless) (Open Source & Local Cluely: A desktop application LLM assistant to help you talk to anything on your screen using locally served Ollama models. Also undetectable to screenshare)
 - [ollama-co2](https://github.com/carbonatedWaterOrg/ollama-co2) (FastAPI web interface for monitoring and managing local and remote Ollama servers with real-time model monitoring and concurrent downloads)
 - [Hillnote](https://hillnote.com) (A Markdown-first workspace designed to supercharge your AI workflow. Create documents ready to integrate with Claude, ChatGPT, Gemini, Cursor, and more - all while keeping your work on your device.)
- [Stakpak](https://github.com/stakpak/agent) (An open source, vendor neutral DevOps agent that works with any model, and any stack, for teams who just want to ship)

 ### Cloud

--- a/api/types.go
+++ b/api/types.go
@@ -912,6 +912,19 @@ type UserResponse struct {
 	Plan      string    `json:"plan,omitempty"`
 }

+type UsageResponse struct {
+	// Start is the time the server started tracking usage (UTC, RFC 3339).
+	Start time.Time        `json:"start"`
+	Usage []ModelUsageData `json:"usage"`
+}
+
+type ModelUsageData struct {
+	Model            string `json:"model"`
+	Requests         int64  `json:"requests"`
+	PromptTokens     int64  `json:"prompt_tokens"`
+	CompletionTokens int64  `json:"completion_tokens"`
+}
+
 // Tensor describes the metadata for a given tensor.
 type Tensor struct {
 	Name  string   `json:"name"`
--- a/app/server/server.go
+++ b/app/server/server.go
@@ -41,11 +41,6 @@ type InferenceCompute struct {
 	VRAM    string
 }

-type InferenceInfo struct {
-	Computes             []InferenceCompute
-	DefaultContextLength int
-}
-
 func New(s *store.Store, devMode bool) *Server {
 	p := resolvePath("ollama")
 	return &Server{store: s, bin: p, dev: devMode}
@@ -267,12 +262,9 @@ func openRotatingLog() (io.WriteCloser, error) {

 // Attempt to retrieve inference compute information from the server
 // log.  Set ctx to timeout to control how long to wait for the logs to appear
-func GetInferenceInfo(ctx context.Context) (*InferenceInfo, error) {
-	info := &InferenceInfo{}
-	computeMarker := regexp.MustCompile(`inference compute.*library=`)
-	defaultCtxMarker := regexp.MustCompile(`vram-based default context`)
-	defaultCtxRegex := regexp.MustCompile(`default_num_ctx=(\d+)`)
-
+func GetInferenceComputer(ctx context.Context) ([]InferenceCompute, error) {
+	inference := []InferenceCompute{}
+	marker := regexp.MustCompile(`inference compute.*library=`)
 	q := `inference compute.*%s=["]([^"]*)["]`
 	nq := `inference compute.*%s=(\S+)\s`
 	type regex struct {
@@ -338,8 +330,8 @@ func GetInferenceInfo(ctx context.Context) (*InferenceInfo, error) {
 		scanner := bufio.NewScanner(file)
 		for scanner.Scan() {
 			line := scanner.Text()
-			// Check for inference compute lines
-			if computeMarker.MatchString(line) {
+			match := marker.FindStringSubmatch(line)
+			if len(match) > 0 {
 				ic := InferenceCompute{
 					Library: get("library", line),
 					Variant: get("variant", line),
@@ -350,25 +342,12 @@ func GetInferenceInfo(ctx context.Context) (*InferenceInfo, error) {
 				}

 				slog.Info("Matched", "inference compute", ic)
-				info.Computes = append(info.Computes, ic)
-				continue
-			}
-			// Check for default context length line
-			if defaultCtxMarker.MatchString(line) {
-				match := defaultCtxRegex.FindStringSubmatch(line)
-				if len(match) > 1 {
-					numCtx, err := strconv.Atoi(match[1])
-					if err == nil {
-						info.DefaultContextLength = numCtx
-						slog.Info("Matched default context length", "default_num_ctx", numCtx)
-					}
+				inference = append(inference, ic)
+			} else {
+				// Break out on first non matching line after we start matching
+				if len(inference) > 0 {
+					return inference, nil
 				}
-				return info, nil
-			}
-			// If we've found compute info but hit a non-matching line, return what we have
-			// This handles older server versions that don't log the default context line
-			if len(info.Computes) > 0 {
-				return info, nil
 			}
 		}
 		time.Sleep(100 * time.Millisecond)
--- a/app/server/server_test.go
+++ b/app/server/server_test.go
@@ -136,50 +136,44 @@ func TestServerCmd(t *testing.T) {
 	}
 }

-func TestGetInferenceInfo(t *testing.T) {
+func TestGetInferenceComputer(t *testing.T) {
 	tests := []struct {
-		name             string
-		log              string
-		expComputes      []InferenceCompute
-		expDefaultCtxLen int
+		name string
+		log  string
+		exp  []InferenceCompute
 	}{
 		{
 			name: "metal",
 			log: `time=2025-06-30T09:23:07.374-07:00 level=DEBUG source=sched.go:108 msg="starting llm scheduler"
 time=2025-06-30T09:23:07.416-07:00 level=INFO source=types.go:130 msg="inference compute" id=0 library=metal variant="" compute="" driver=0.0 name="" total="96.0 GiB" available="96.0 GiB"
-time=2025-06-30T09:23:07.417-07:00 level=INFO source=routes.go:1721 msg="vram-based default context" total_vram="96.0 GiB" default_num_ctx=262144
 time=2025-06-30T09:25:56.197-07:00 level=DEBUG source=ggml.go:155 msg="key not found" key=general.alignment default=32
 `,
-			expComputes: []InferenceCompute{{
+			exp: []InferenceCompute{{
 				Library: "metal",
 				Driver:  "0.0",
 				VRAM:    "96.0 GiB",
 			}},
-			expDefaultCtxLen: 262144,
 		},
 		{
 			name: "cpu",
 			log: `time=2025-07-01T17:59:51.470Z level=INFO source=gpu.go:377 msg="no compatible GPUs were discovered"
 time=2025-07-01T17:59:51.470Z level=INFO source=types.go:130 msg="inference compute" id=0 library=cpu variant="" compute="" driver=0.0 name="" total="31.3 GiB" available="30.4 GiB"
-time=2025-07-01T17:59:51.471Z level=INFO source=routes.go:1721 msg="vram-based default context" total_vram="31.3 GiB" default_num_ctx=32768
 [GIN] 2025/07/01 - 18:00:09 | 200 |      50.263µs | 100.126.204.152 | HEAD     "/"
 `,
-			expComputes: []InferenceCompute{{
+			exp: []InferenceCompute{{
 				Library: "cpu",
 				Driver:  "0.0",
 				VRAM:    "31.3 GiB",
 			}},
-			expDefaultCtxLen: 32768,
 		},
 		{
 			name: "cuda1",
 			log: `time=2025-07-01T19:33:43.162Z level=DEBUG source=amd_linux.go:419 msg="amdgpu driver not detected /sys/module/amdgpu"
 releasing cuda driver library
 time=2025-07-01T19:33:43.162Z level=INFO source=types.go:130 msg="inference compute" id=GPU-452cac9f-6960-839c-4fb3-0cec83699196 library=cuda variant=v12 compute=6.1 driver=12.7 name="NVIDIA GeForce GT 1030" total="3.9 GiB" available="3.9 GiB"
-time=2025-07-01T19:33:43.163Z level=INFO source=routes.go:1721 msg="vram-based default context" total_vram="3.9 GiB" default_num_ctx=4096
 [GIN] 2025/07/01 - 18:00:09 | 200 |      50.263µs | 100.126.204.152 | HEAD     "/"
 `,
-			expComputes: []InferenceCompute{{
+			exp: []InferenceCompute{{
 				Library: "cuda",
 				Variant: "v12",
 				Compute: "6.1",
@@ -187,7 +181,6 @@ time=2025-07-01T19:33:43.163Z level=INFO source=routes.go:1721 msg="vram-based d
 				Name:    "NVIDIA GeForce GT 1030",
 				VRAM:    "3.9 GiB",
 			}},
-			expDefaultCtxLen: 4096,
 		},
 		{
 			name: "frank",
@@ -195,10 +188,9 @@ time=2025-07-01T19:33:43.163Z level=INFO source=routes.go:1721 msg="vram-based d
 		releasing cuda driver library
 		time=2025-07-01T19:36:13.315Z level=INFO source=types.go:130 msg="inference compute" id=GPU-d6de3398-9932-6902-11ec-fee8e424c8a2 library=cuda variant=v12 compute=7.5 driver=12.8 name="NVIDIA GeForce RTX 2080 Ti" total="10.6 GiB" available="10.4 GiB"
 		time=2025-07-01T19:36:13.315Z level=INFO source=types.go:130 msg="inference compute" id=GPU-9abb57639fa80c50 library=rocm variant="" compute=gfx1030 driver=6.3 name=1002:73bf total="16.0 GiB" available="1.3 GiB"
-		time=2025-07-01T19:36:13.316Z level=INFO source=routes.go:1721 msg="vram-based default context" total_vram="26.6 GiB" default_num_ctx=32768
 		[GIN] 2025/07/01 - 18:00:09 | 200 |      50.263µs | 100.126.204.152 | HEAD     "/"
 		`,
-			expComputes: []InferenceCompute{
+			exp: []InferenceCompute{
 				{
 					Library: "cuda",
 					Variant: "v12",
@@ -215,20 +207,6 @@ time=2025-07-01T19:33:43.163Z level=INFO source=routes.go:1721 msg="vram-based d
 					VRAM:    "16.0 GiB",
 				},
 			},
-			expDefaultCtxLen: 32768,
-		},
-		{
-			name: "missing_default_context",
-			log: `time=2025-06-30T09:23:07.374-07:00 level=DEBUG source=sched.go:108 msg="starting llm scheduler"
-time=2025-06-30T09:23:07.416-07:00 level=INFO source=types.go:130 msg="inference compute" id=0 library=metal variant="" compute="" driver=0.0 name="" total="96.0 GiB" available="96.0 GiB"
-time=2025-06-30T09:25:56.197-07:00 level=DEBUG source=ggml.go:155 msg="key not found" key=general.alignment default=32
-`,
-			expComputes: []InferenceCompute{{
-				Library: "metal",
-				Driver:  "0.0",
-				VRAM:    "96.0 GiB",
-			}},
-			expDefaultCtxLen: 0, // No default context line, should return 0
 		},
 	}
 	for _, tt := range tests {
@@ -241,21 +219,18 @@ time=2025-06-30T09:25:56.197-07:00 level=DEBUG source=ggml.go:155 msg="key not f
 			}
 			ctx, cancel := context.WithTimeout(t.Context(), 10*time.Millisecond)
 			defer cancel()
-			info, err := GetInferenceInfo(ctx)
+			ics, err := GetInferenceComputer(ctx)
 			if err != nil {
-				t.Fatalf("failed to get inference info: %v", err)
+				t.Fatalf(" failed to get inference compute: %v", err)
 			}
-			if !reflect.DeepEqual(info.Computes, tt.expComputes) {
-				t.Fatalf("computes mismatch\ngot:\n%#v\nwant:\n%#v", info.Computes, tt.expComputes)
-			}
-			if info.DefaultContextLength != tt.expDefaultCtxLen {
-				t.Fatalf("default context length mismatch: got %d, want %d", info.DefaultContextLength, tt.expDefaultCtxLen)
+			if !reflect.DeepEqual(ics, tt.exp) {
+				t.Fatalf("got:\n%#v\nwant:\n%#v", ics, tt.exp)
 			}
 		})
 	}
 }

-func TestGetInferenceInfoTimeout(t *testing.T) {
+func TestGetInferenceComputerTimeout(t *testing.T) {
 	ctx, cancel := context.WithTimeout(t.Context(), 10*time.Millisecond)
 	defer cancel()
 	tmpDir := t.TempDir()
@@ -264,7 +239,7 @@ func TestGetInferenceInfoTimeout(t *testing.T) {
 	if err != nil {
 		t.Fatalf("failed to write log file %s: %s", serverLogPath, err)
 	}
-	_, err = GetInferenceInfo(ctx)
+	_, err = GetInferenceComputer(ctx)
 	if err == nil {
 		t.Fatal("expected timeout")
 	}
--- a/app/store/database.go
+++ b/app/store/database.go
@@ -14,7 +14,7 @@ import (

 // currentSchemaVersion defines the current database schema version.
 // Increment this when making schema changes that require migrations.
-const currentSchemaVersion = 13
+const currentSchemaVersion = 12

 // database wraps the SQLite connection.
 // SQLite handles its own locking for concurrent access:
@@ -73,7 +73,7 @@ func (db *database) init() error {
 		agent BOOLEAN NOT NULL DEFAULT 0,
 		tools BOOLEAN NOT NULL DEFAULT 0,
 		working_dir TEXT NOT NULL DEFAULT '',
-		context_length INTEGER NOT NULL DEFAULT 0,
+		context_length INTEGER NOT NULL DEFAULT 4096,
 		window_width INTEGER NOT NULL DEFAULT 0,
 		window_height INTEGER NOT NULL DEFAULT 0,
 		config_migrated BOOLEAN NOT NULL DEFAULT 0,
@@ -244,12 +244,6 @@ func (db *database) migrate() error {
 				return fmt.Errorf("migrate v11 to v12: %w", err)
 			}
 			version = 12
-		case 12:
-			// change default context_length from 4096 to 0 (VRAM-based tiered defaults)
-			if err := db.migrateV12ToV13(); err != nil {
-				return fmt.Errorf("migrate v12 to v13: %w", err)
-			}
-			version = 13
 		default:
 			// If we have a version we don't recognize, just set it to current
 			// This might happen during development
@@ -458,23 +452,6 @@ func (db *database) migrateV11ToV12() error {
 	return nil
 }

-// migrateV12ToV13 changes the default context_length from 4096 to 0
-// When context_length is 0, the ollama server uses VRAM-based tiered defaults
-func (db *database) migrateV12ToV13() error {
-	// Update users who have the old default of 4096 to the new default of 0
-	_, err := db.conn.Exec(`UPDATE settings SET context_length = 0 WHERE context_length = 4096`)
-	if err != nil {
-		return fmt.Errorf("update context_length default: %w", err)
-	}
-
-	_, err = db.conn.Exec(`UPDATE settings SET schema_version = 13`)
-	if err != nil {
-		return fmt.Errorf("update schema version: %w", err)
-	}
-
-	return nil
-}
-
 // cleanupOrphanedData removes orphaned records that may exist due to the foreign key bug
 func (db *database) cleanupOrphanedData() error {
 	_, err := db.conn.Exec(`
--- a/app/store/testdata/schema.sql
+++ b/app/store/testdata/schema.sql
@@ -13,7 +13,7 @@ CREATE TABLE IF NOT EXISTS settings (
    agent BOOLEAN NOT NULL DEFAULT 0,
    tools BOOLEAN NOT NULL DEFAULT 0,
    working_dir TEXT NOT NULL DEFAULT '',
-    context_length INTEGER NOT NULL DEFAULT 0,
+    context_length INTEGER NOT NULL DEFAULT 4096,
    window_width INTEGER NOT NULL DEFAULT 0,
    window_height INTEGER NOT NULL DEFAULT 0,
    config_migrated BOOLEAN NOT NULL DEFAULT 0,
--- a/app/ui/app/codegen/gotypes.gen.ts
+++ b/app/ui/app/codegen/gotypes.gen.ts
@@ -289,12 +289,10 @@ export class InferenceCompute {
 }
 export class InferenceComputeResponse {
    inferenceComputes: InferenceCompute[];
-    defaultContextLength: number;

    constructor(source: any = {}) {
        if ('string' === typeof source) source = JSON.parse(source);
        this.inferenceComputes = this.convertValues(source["inferenceComputes"], InferenceCompute);
-        this.defaultContextLength = source["defaultContextLength"];
    }

 	convertValues(a: any, classs: any, asMap: boolean = false): any {
--- a/app/ui/app/src/api.ts
+++ b/app/ui/app/src/api.ts
@@ -4,6 +4,7 @@ import {
  ChatEvent,
  DownloadEvent,
  ErrorEvent,
+  InferenceCompute,
  InferenceComputeResponse,
  ModelCapabilitiesResponse,
  Model,
@@ -378,7 +379,7 @@ export async function* pullModel(
  }
 }

-export async function getInferenceCompute(): Promise<InferenceComputeResponse> {
+export async function getInferenceCompute(): Promise<InferenceCompute[]> {
  const response = await fetch(`${API_BASE}/api/v1/inference-compute`);
  if (!response.ok) {
    throw new Error(
@@ -387,7 +388,8 @@ export async function getInferenceCompute(): Promise<InferenceComputeResponse> {
  }

  const data = await response.json();
-  return new InferenceComputeResponse(data);
+  const inferenceComputeResponse = new InferenceComputeResponse(data);
+  return inferenceComputeResponse.inferenceComputes || [];
 }

 export async function fetchHealth(): Promise<boolean> {
--- a/app/ui/app/src/components/Settings.tsx
+++ b/app/ui/app/src/components/Settings.tsx
@@ -19,7 +19,7 @@ import { Settings as SettingsType } from "@/gotypes";
 import { useNavigate } from "@tanstack/react-router";
 import { useUser } from "@/hooks/useUser";
 import { useQuery, useMutation, useQueryClient } from "@tanstack/react-query";
-import { getSettings, updateSettings, getInferenceCompute } from "@/api";
+import { getSettings, updateSettings } from "@/api";

 function AnimatedDots() {
  return (
@@ -65,13 +65,6 @@ export default function Settings() {

  const settings = settingsData?.settings || null;

-  const { data: inferenceComputeResponse } = useQuery({
-    queryKey: ["inferenceCompute"],
-    queryFn: getInferenceCompute,
-  });
-
-  const defaultContextLength = inferenceComputeResponse?.defaultContextLength;
-
  const updateSettingsMutation = useMutation({
    mutationFn: updateSettings,
    onSuccess: () => {
@@ -155,7 +148,7 @@ export default function Settings() {
        Models: "",
        Agent: false,
        Tools: false,
-        ContextLength: 0,
+        ContextLength: 4096,
        AirplaneMode: false,
      });
      updateSettingsMutation.mutate(defaultSettings);
@@ -426,11 +419,13 @@ export default function Settings() {
                    </Description>
                    <div className="mt-3">
                      <Slider
-                        value={settings.ContextLength || defaultContextLength || 0}
+                        value={(() => {
+                          // Otherwise use the settings value
+                          return settings.ContextLength || 4096;
+                        })()}
                        onChange={(value) => {
                          handleChange("ContextLength", value);
                        }}
-                        disabled={!defaultContextLength}
                        options={[
                          { value: 4096, label: "4k" },
                          { value: 8192, label: "8k" },
@@ -445,7 +440,6 @@ export default function Settings() {
                  </div>
                </div>
              </Field>
-
              {/* Airplane Mode */}
              <Field>
                <div className="flex items-start justify-between gap-4">
--- a/app/ui/app/src/components/ui/slider.tsx
+++ b/app/ui/app/src/components/ui/slider.tsx
@@ -6,11 +6,10 @@ export interface SliderProps {
  value?: number;
  onChange?: (value: number) => void;
  className?: string;
-  disabled?: boolean;
 }

 const Slider = React.forwardRef<HTMLDivElement, SliderProps>(
-  ({ label, options, value = 0, onChange, disabled = false }, ref) => {
+  ({ label, options, value = 0, onChange }, ref) => {
    const [selectedValue, setSelectedValue] = React.useState(value);
    const [isDragging, setIsDragging] = React.useState(false);
    const containerRef = React.useRef<HTMLDivElement>(null);
@@ -21,7 +20,6 @@ const Slider = React.forwardRef<HTMLDivElement, SliderProps>(
    }, [value]);

    const handleClick = (optionValue: number) => {
-      if (disabled) return;
      setSelectedValue(optionValue);
      onChange?.(optionValue);
    };
@@ -41,7 +39,6 @@ const Slider = React.forwardRef<HTMLDivElement, SliderProps>(
    };

    const handleMouseDown = (e: React.MouseEvent) => {
-      if (disabled) return;
      setIsDragging(true);
      e.preventDefault();
    };
@@ -80,7 +77,7 @@ const Slider = React.forwardRef<HTMLDivElement, SliderProps>(
    }

    return (
-      <div className={`space-y-2 ${disabled ? "opacity-50" : ""}`} ref={ref}>
+      <div className="space-y-2" ref={ref}>
        {label && <label className="text-sm font-medium">{label}</label>}
        <div className="relative">
          <div className="absolute top-[9px] left-2 right-2 h-1 bg-neutral-200 dark:bg-neutral-700 pointer-events-none rounded-full" />
@@ -91,11 +88,10 @@ const Slider = React.forwardRef<HTMLDivElement, SliderProps>(
                <button
                  onClick={() => handleClick(option.value)}
                  onMouseDown={handleMouseDown}
-                  disabled={disabled}
-                  className={`relative px-3 py-6 -mx-3 -my-6 z-10 ${disabled ? "cursor-not-allowed" : "cursor-pointer"}`}
+                  className="relative px-3 py-6 -mx-3 -my-6 z-10 cursor-pointer"
                >
                  <div className="relative w-5 h-5 flex items-center justify-center">
-                    {selectedValue === option.value && !disabled && (
+                    {selectedValue === option.value && (
                      <div className="w-4 h-4 bg-white dark:bg-white border border-neutral-400 dark:border-neutral-500 rounded-full cursor-grab active:cursor-grabbing" />
                    )}
                  </div>
--- a/app/ui/app/src/hooks/useSelectedModel.ts
+++ b/app/ui/app/src/hooks/useSelectedModel.ts
@@ -26,14 +26,12 @@ export function useSelectedModel(currentChatId?: string, searchQuery?: string) {
    currentChatId && currentChatId !== "new" ? currentChatId : "",
  );

-  const { data: inferenceComputeResponse } = useQuery({
-    queryKey: ["inferenceCompute"],
+  const { data: inferenceComputes = [] } = useQuery({
+    queryKey: ["inference-compute"],
    queryFn: getInferenceCompute,
    enabled: !settings.selectedModel, // Only fetch if no model is selected
  });

-  const inferenceComputes = inferenceComputeResponse?.inferenceComputes || [];
-
  const totalVRAM = useMemo(
    () => getTotalVRAM(inferenceComputes),
    [inferenceComputes],
--- a/app/ui/responses/types.go
+++ b/app/ui/responses/types.go
@@ -45,8 +45,7 @@ type InferenceCompute struct {
 }

 type InferenceComputeResponse struct {
-	InferenceComputes    []InferenceCompute `json:"inferenceComputes"`
-	DefaultContextLength int                `json:"defaultContextLength"`
+	InferenceComputes []InferenceCompute `json:"inferenceComputes"`
 }

 type ModelCapabilitiesResponse struct {
--- a/app/ui/ui.go
+++ b/app/ui/ui.go
@@ -1417,6 +1417,11 @@ func (s *Server) getSettings(w http.ResponseWriter, r *http.Request) error {
 		settings.Models = envconfig.Models()
 	}

+	// set default context length if not set
+	if settings.ContextLength == 0 {
+		settings.ContextLength = 4096
+	}
+
 	// Include current runtime settings
 	settings.Agent = s.Agent
 	settings.Tools = s.Tools
@@ -1458,14 +1463,14 @@ func (s *Server) settings(w http.ResponseWriter, r *http.Request) error {
 func (s *Server) getInferenceCompute(w http.ResponseWriter, r *http.Request) error {
 	ctx, cancel := context.WithTimeout(r.Context(), 500*time.Millisecond)
 	defer cancel()
-	info, err := server.GetInferenceInfo(ctx)
+	serverInferenceComputes, err := server.GetInferenceComputer(ctx)
 	if err != nil {
-		s.log().Error("failed to get inference info", "error", err)
-		return fmt.Errorf("failed to get inference info: %w", err)
+		s.log().Error("failed to get inference compute", "error", err)
+		return fmt.Errorf("failed to get inference compute: %w", err)
 	}

-	inferenceComputes := make([]responses.InferenceCompute, len(info.Computes))
-	for i, ic := range info.Computes {
+	inferenceComputes := make([]responses.InferenceCompute, len(serverInferenceComputes))
+	for i, ic := range serverInferenceComputes {
 		inferenceComputes[i] = responses.InferenceCompute{
 			Library: ic.Library,
 			Variant: ic.Variant,
@@ -1477,8 +1482,7 @@ func (s *Server) getInferenceCompute(w http.ResponseWriter, r *http.Request) err
 	}

 	response := responses.InferenceComputeResponse{
-		InferenceComputes:    inferenceComputes,
-		DefaultContextLength: info.DefaultContextLength,
+		InferenceComputes: inferenceComputes,
 	}

 	w.Header().Set("Content-Type", "application/json")
--- a/cmd/cmd.go
+++ b/cmd/cmd.go
@@ -1888,7 +1888,7 @@ func NewCLI() *cobra.Command {
 	serveCmd := &cobra.Command{
 		Use:     "serve",
 		Aliases: []string{"start"},
-		Short:   "Start Ollama",
+		Short:   "Start ollama",
 		Args:    cobra.ExactArgs(0),
 		RunE:    RunServer,
 	}
--- a/cmd/config/clawdbot.go
+++ b/cmd/config/clawdbot.go
@@ -13,46 +13,26 @@ import (
 	"github.com/ollama/ollama/envconfig"
 )

-type Openclaw struct{}
+type Clawdbot struct{}

-func (c *Openclaw) String() string { return "OpenClaw" }
+func (c *Clawdbot) String() string { return "Clawdbot" }

 const ansiGreen = "\033[32m"

-func (c *Openclaw) Run(model string) error {
-	bin := "openclaw"
-	if _, err := exec.LookPath(bin); err != nil {
-		bin = "clawdbot"
-		if _, err := exec.LookPath(bin); err != nil {
-			return fmt.Errorf("openclaw is not installed, install from https://docs.openclaw.ai")
-		}
+func (c *Clawdbot) Run(model string) error {
+	if _, err := exec.LookPath("clawdbot"); err != nil {
+		return fmt.Errorf("clawdbot is not installed, install from https://docs.clawd.bot")
 	}

 	models := []string{model}
-	if config, err := loadIntegration("openclaw"); err == nil && len(config.Models) > 0 {
-		models = config.Models
-	} else if config, err := loadIntegration("clawdbot"); err == nil && len(config.Models) > 0 {
+	if config, err := loadIntegration("clawdbot"); err == nil && len(config.Models) > 0 {
 		models = config.Models
 	}
 	if err := c.Edit(models); err != nil {
 		return fmt.Errorf("setup failed: %w", err)
 	}

-	if !c.onboarded() {
-		// Onboarding not completed: run it (model already set via Edit)
-		// Use "ollama" as gateway token for simple local access
-		cmd := exec.Command(bin, "onboard",
-			"--auth-choice", "skip",
-			"--gateway-token", "ollama",
-		)
-		cmd.Stdin = os.Stdin
-		cmd.Stdout = os.Stdout
-		cmd.Stderr = os.Stderr
-		return cmd.Run()
-	}
-
-	// Onboarding completed: run gateway
-	cmd := exec.Command(bin, "gateway")
+	cmd := exec.Command("clawdbot", "gateway")
 	cmd.Stdin = os.Stdin

 	// Capture output to detect "already running" message
@@ -62,55 +42,22 @@ func (c *Openclaw) Run(model string) error {

 	err := cmd.Run()
 	if err != nil && strings.Contains(outputBuf.String(), "Gateway already running") {
-		fmt.Fprintf(os.Stderr, "%sOpenClaw has been configured with Ollama. Gateway is already running.%s\n", ansiGreen, ansiReset)
+		fmt.Fprintf(os.Stderr, "%sClawdbot has been configured with Ollama. Gateway is already running.%s\n", ansiGreen, ansiReset)
 		return nil
 	}
 	return err
 }

-// onboarded checks if OpenClaw onboarding wizard was completed
-// by looking for the wizard.lastRunAt marker in the config
-func (c *Openclaw) onboarded() bool {
-	home, err := os.UserHomeDir()
-	if err != nil {
-		return false
-	}
-
-	configPath := filepath.Join(home, ".openclaw", "openclaw.json")
-	legacyPath := filepath.Join(home, ".clawdbot", "clawdbot.json")
-
-	config := make(map[string]any)
-	if data, err := os.ReadFile(configPath); err == nil {
-		_ = json.Unmarshal(data, &config)
-	} else if data, err := os.ReadFile(legacyPath); err == nil {
-		_ = json.Unmarshal(data, &config)
-	} else {
-		return false
-	}
-
-	// Check for wizard.lastRunAt marker (set when onboarding completes)
-	wizard, _ := config["wizard"].(map[string]any)
-	if wizard == nil {
-		return false
-	}
-	lastRunAt, _ := wizard["lastRunAt"].(string)
-	return lastRunAt != ""
-}
-
-func (c *Openclaw) Paths() []string {
+func (c *Clawdbot) Paths() []string {
 	home, _ := os.UserHomeDir()
-	p := filepath.Join(home, ".openclaw", "openclaw.json")
+	p := filepath.Join(home, ".clawdbot", "clawdbot.json")
 	if _, err := os.Stat(p); err == nil {
 		return []string{p}
 	}
-	legacy := filepath.Join(home, ".clawdbot", "clawdbot.json")
-	if _, err := os.Stat(legacy); err == nil {
-		return []string{legacy}
-	}
 	return nil
 }

-func (c *Openclaw) Edit(models []string) error {
+func (c *Clawdbot) Edit(models []string) error {
 	if len(models) == 0 {
 		return nil
 	}
@@ -120,8 +67,7 @@ func (c *Openclaw) Edit(models []string) error {
 		return err
 	}

-	configPath := filepath.Join(home, ".openclaw", "openclaw.json")
-	legacyPath := filepath.Join(home, ".clawdbot", "clawdbot.json")
+	configPath := filepath.Join(home, ".clawdbot", "clawdbot.json")
 	if err := os.MkdirAll(filepath.Dir(configPath), 0o755); err != nil {
 		return err
 	}
@@ -130,8 +76,6 @@ func (c *Openclaw) Edit(models []string) error {
 	config := make(map[string]any)
 	if data, err := os.ReadFile(configPath); err == nil {
 		_ = json.Unmarshal(data, &config)
-	} else if data, err := os.ReadFile(legacyPath); err == nil {
-		_ = json.Unmarshal(data, &config)
 	}

 	// Navigate/create: models.providers.ollama (preserving other providers)
@@ -223,18 +167,15 @@ func (c *Openclaw) Edit(models []string) error {
 	return writeWithBackup(configPath, data)
 }

-func (c *Openclaw) Models() []string {
+func (c *Clawdbot) Models() []string {
 	home, err := os.UserHomeDir()
 	if err != nil {
 		return nil
 	}

-	config, err := readJSONFile(filepath.Join(home, ".openclaw", "openclaw.json"))
+	config, err := readJSONFile(filepath.Join(home, ".clawdbot", "clawdbot.json"))
 	if err != nil {
-		config, err = readJSONFile(filepath.Join(home, ".clawdbot", "clawdbot.json"))
-		if err != nil {
-			return nil
-		}
+		return nil
 	}

 	modelsSection, _ := config["models"].(map[string]any)
--- a/cmd/config/clawdbot_test.go
+++ b/cmd/config/clawdbot_test.go
@@ -8,12 +8,12 @@ import (
 	"testing"
 )

-func TestOpenclawIntegration(t *testing.T) {
-	c := &Openclaw{}
+func TestClawdbotIntegration(t *testing.T) {
+	c := &Clawdbot{}

 	t.Run("String", func(t *testing.T) {
-		if got := c.String(); got != "OpenClaw" {
-			t.Errorf("String() = %q, want %q", got, "OpenClaw")
+		if got := c.String(); got != "Clawdbot" {
+			t.Errorf("String() = %q, want %q", got, "Clawdbot")
 		}
 	})

@@ -26,13 +26,13 @@ func TestOpenclawIntegration(t *testing.T) {
 	})
 }

-func TestOpenclawEdit(t *testing.T) {
-	c := &Openclaw{}
+func TestClawdbotEdit(t *testing.T) {
+	c := &Clawdbot{}
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)

-	configDir := filepath.Join(tmpDir, ".openclaw")
-	configPath := filepath.Join(configDir, "openclaw.json")
+	configDir := filepath.Join(tmpDir, ".clawdbot")
+	configPath := filepath.Join(configDir, "clawdbot.json")

 	cleanup := func() { os.RemoveAll(configDir) }

@@ -41,8 +41,8 @@ func TestOpenclawEdit(t *testing.T) {
 		if err := c.Edit([]string{"llama3.2"}); err != nil {
 			t.Fatal(err)
 		}
-		assertOpenclawModelExists(t, configPath, "llama3.2")
-		assertOpenclawPrimaryModel(t, configPath, "ollama/llama3.2")
+		assertClawdbotModelExists(t, configPath, "llama3.2")
+		assertClawdbotPrimaryModel(t, configPath, "ollama/llama3.2")
 	})

 	t.Run("multiple models - first is primary", func(t *testing.T) {
@@ -50,9 +50,9 @@ func TestOpenclawEdit(t *testing.T) {
 		if err := c.Edit([]string{"llama3.2", "mistral"}); err != nil {
 			t.Fatal(err)
 		}
-		assertOpenclawModelExists(t, configPath, "llama3.2")
-		assertOpenclawModelExists(t, configPath, "mistral")
-		assertOpenclawPrimaryModel(t, configPath, "ollama/llama3.2")
+		assertClawdbotModelExists(t, configPath, "llama3.2")
+		assertClawdbotModelExists(t, configPath, "mistral")
+		assertClawdbotPrimaryModel(t, configPath, "ollama/llama3.2")
 	})

 	t.Run("preserve other providers", func(t *testing.T) {
@@ -127,8 +127,8 @@ func TestOpenclawEdit(t *testing.T) {
 		c.Edit([]string{"llama3.2", "mistral"})
 		c.Edit([]string{"llama3.2"})

-		assertOpenclawModelExists(t, configPath, "llama3.2")
-		assertOpenclawModelNotExists(t, configPath, "mistral")
+		assertClawdbotModelExists(t, configPath, "llama3.2")
+		assertClawdbotModelNotExists(t, configPath, "mistral")
 	})

 	t.Run("empty models is no-op", func(t *testing.T) {
@@ -169,12 +169,12 @@ func TestOpenclawEdit(t *testing.T) {
 		if err := c.Edit([]string{"llama3.2"}); err != nil {
 			t.Fatal(err)
 		}
-		assertOpenclawModelExists(t, configPath, "llama3.2")
+		assertClawdbotModelExists(t, configPath, "llama3.2")
 	})
 }

-func TestOpenclawModels(t *testing.T) {
-	c := &Openclaw{}
+func TestClawdbotModels(t *testing.T) {
+	c := &Clawdbot{}
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)

@@ -185,9 +185,9 @@ func TestOpenclawModels(t *testing.T) {
 	})

 	t.Run("returns all ollama models", func(t *testing.T) {
-		configDir := filepath.Join(tmpDir, ".openclaw")
+		configDir := filepath.Join(tmpDir, ".clawdbot")
 		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(filepath.Join(configDir, "openclaw.json"), []byte(`{
+		os.WriteFile(filepath.Join(configDir, "clawdbot.json"), []byte(`{
 			"models":{"providers":{"ollama":{"models":[
 				{"id":"llama3.2"},
 				{"id":"mistral"}
@@ -202,7 +202,7 @@ func TestOpenclawModels(t *testing.T) {
 }

 // Helper functions
-func assertOpenclawModelExists(t *testing.T, path, model string) {
+func assertClawdbotModelExists(t *testing.T, path, model string) {
 	t.Helper()
 	data, _ := os.ReadFile(path)
 	var cfg map[string]any
@@ -221,7 +221,7 @@ func assertOpenclawModelExists(t *testing.T, path, model string) {
 	t.Errorf("model %s not found", model)
 }

-func assertOpenclawModelNotExists(t *testing.T, path, model string) {
+func assertClawdbotModelNotExists(t *testing.T, path, model string) {
 	t.Helper()
 	data, _ := os.ReadFile(path)
 	var cfg map[string]any
@@ -239,7 +239,7 @@ func assertOpenclawModelNotExists(t *testing.T, path, model string) {
 	}
 }

-func assertOpenclawPrimaryModel(t *testing.T, path, expected string) {
+func assertClawdbotPrimaryModel(t *testing.T, path, expected string) {
 	t.Helper()
 	data, _ := os.ReadFile(path)
 	var cfg map[string]any
@@ -252,15 +252,15 @@ func assertOpenclawPrimaryModel(t *testing.T, path, expected string) {
 	}
 }

-func TestOpenclawPaths(t *testing.T) {
-	c := &Openclaw{}
+func TestClawdbotPaths(t *testing.T) {
+	c := &Clawdbot{}

 	t.Run("returns path when config exists", func(t *testing.T) {
 		tmpDir := t.TempDir()
 		setTestHome(t, tmpDir)
-		configDir := filepath.Join(tmpDir, ".openclaw")
+		configDir := filepath.Join(tmpDir, ".clawdbot")
 		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(filepath.Join(configDir, "openclaw.json"), []byte(`{}`), 0o644)
+		os.WriteFile(filepath.Join(configDir, "clawdbot.json"), []byte(`{}`), 0o644)

 		paths := c.Paths()
 		if len(paths) != 1 {
@@ -277,12 +277,12 @@ func TestOpenclawPaths(t *testing.T) {
 	})
 }

-func TestOpenclawModelsEdgeCases(t *testing.T) {
-	c := &Openclaw{}
+func TestClawdbotModelsEdgeCases(t *testing.T) {
+	c := &Clawdbot{}
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
-	configDir := filepath.Join(tmpDir, ".openclaw")
-	configPath := filepath.Join(configDir, "openclaw.json")
+	configDir := filepath.Join(tmpDir, ".clawdbot")
+	configPath := filepath.Join(configDir, "clawdbot.json")
 	cleanup := func() { os.RemoveAll(configDir) }

 	t.Run("corrupted JSON returns nil", func(t *testing.T) {
@@ -340,11 +340,11 @@ func TestOpenclawModelsEdgeCases(t *testing.T) {
 	})
 }

-func TestOpenclawEditSchemaFields(t *testing.T) {
-	c := &Openclaw{}
+func TestClawdbotEditSchemaFields(t *testing.T) {
+	c := &Clawdbot{}
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
-	configPath := filepath.Join(tmpDir, ".openclaw", "openclaw.json")
+	configPath := filepath.Join(tmpDir, ".clawdbot", "clawdbot.json")

 	if err := c.Edit([]string{"llama3.2"}); err != nil {
 		t.Fatal(err)
@@ -381,20 +381,20 @@ func TestOpenclawEditSchemaFields(t *testing.T) {
 	}
 }

-func TestOpenclawEditModelNames(t *testing.T) {
-	c := &Openclaw{}
+func TestClawdbotEditModelNames(t *testing.T) {
+	c := &Clawdbot{}
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
-	configPath := filepath.Join(tmpDir, ".openclaw", "openclaw.json")
-	cleanup := func() { os.RemoveAll(filepath.Join(tmpDir, ".openclaw")) }
+	configPath := filepath.Join(tmpDir, ".clawdbot", "clawdbot.json")
+	cleanup := func() { os.RemoveAll(filepath.Join(tmpDir, ".clawdbot")) }

 	t.Run("model with colon tag", func(t *testing.T) {
 		cleanup()
 		if err := c.Edit([]string{"llama3.2:70b"}); err != nil {
 			t.Fatal(err)
 		}
-		assertOpenclawModelExists(t, configPath, "llama3.2:70b")
-		assertOpenclawPrimaryModel(t, configPath, "ollama/llama3.2:70b")
+		assertClawdbotModelExists(t, configPath, "llama3.2:70b")
+		assertClawdbotPrimaryModel(t, configPath, "ollama/llama3.2:70b")
 	})

 	t.Run("model with slash", func(t *testing.T) {
@@ -402,8 +402,8 @@ func TestOpenclawEditModelNames(t *testing.T) {
 		if err := c.Edit([]string{"library/model:tag"}); err != nil {
 			t.Fatal(err)
 		}
-		assertOpenclawModelExists(t, configPath, "library/model:tag")
-		assertOpenclawPrimaryModel(t, configPath, "ollama/library/model:tag")
+		assertClawdbotModelExists(t, configPath, "library/model:tag")
+		assertClawdbotPrimaryModel(t, configPath, "ollama/library/model:tag")
 	})

 	t.Run("model with hyphen", func(t *testing.T) {
@@ -411,16 +411,16 @@ func TestOpenclawEditModelNames(t *testing.T) {
 		if err := c.Edit([]string{"test-model"}); err != nil {
 			t.Fatal(err)
 		}
-		assertOpenclawModelExists(t, configPath, "test-model")
+		assertClawdbotModelExists(t, configPath, "test-model")
 	})
 }

-func TestOpenclawEditAgentsPreservation(t *testing.T) {
-	c := &Openclaw{}
+func TestClawdbotEditAgentsPreservation(t *testing.T) {
+	c := &Clawdbot{}
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
-	configDir := filepath.Join(tmpDir, ".openclaw")
-	configPath := filepath.Join(configDir, "openclaw.json")
+	configDir := filepath.Join(tmpDir, ".clawdbot")
+	configPath := filepath.Join(configDir, "clawdbot.json")
 	cleanup := func() { os.RemoveAll(configDir) }

 	t.Run("preserve other agent defaults", func(t *testing.T) {
@@ -457,7 +457,7 @@ func TestOpenclawEditAgentsPreservation(t *testing.T) {
 	})
 }

-const testOpenclawFixture = `{
+const testClawdbotFixture = `{
  "theme": "dark",
  "mcp": {"servers": {"custom": {"enabled": true}}},
  "models": {
@@ -475,15 +475,15 @@ const testOpenclawFixture = `{
  }
 }`

-func TestOpenclawEdit_RoundTrip(t *testing.T) {
-	c := &Openclaw{}
+func TestClawdbotEdit_RoundTrip(t *testing.T) {
+	c := &Clawdbot{}
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
-	configDir := filepath.Join(tmpDir, ".openclaw")
-	configPath := filepath.Join(configDir, "openclaw.json")
+	configDir := filepath.Join(tmpDir, ".clawdbot")
+	configPath := filepath.Join(configDir, "clawdbot.json")

 	os.MkdirAll(configDir, 0o755)
-	os.WriteFile(configPath, []byte(testOpenclawFixture), 0o644)
+	os.WriteFile(configPath, []byte(testClawdbotFixture), 0o644)

 	if err := c.Edit([]string{"llama3.2", "mistral"}); err != nil {
 		t.Fatal(err)
@@ -521,15 +521,15 @@ func TestOpenclawEdit_RoundTrip(t *testing.T) {
 	}
 }

-func TestOpenclawEdit_Idempotent(t *testing.T) {
-	c := &Openclaw{}
+func TestClawdbotEdit_Idempotent(t *testing.T) {
+	c := &Clawdbot{}
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
-	configDir := filepath.Join(tmpDir, ".openclaw")
-	configPath := filepath.Join(configDir, "openclaw.json")
+	configDir := filepath.Join(tmpDir, ".clawdbot")
+	configPath := filepath.Join(configDir, "clawdbot.json")

 	os.MkdirAll(configDir, 0o755)
-	os.WriteFile(configPath, []byte(testOpenclawFixture), 0o644)
+	os.WriteFile(configPath, []byte(testClawdbotFixture), 0o644)

 	c.Edit([]string{"llama3.2", "mistral"})
 	firstData, _ := os.ReadFile(configPath)
@@ -542,15 +542,15 @@ func TestOpenclawEdit_Idempotent(t *testing.T) {
 	}
 }

-func TestOpenclawEdit_MultipleConsecutiveEdits(t *testing.T) {
-	c := &Openclaw{}
+func TestClawdbotEdit_MultipleConsecutiveEdits(t *testing.T) {
+	c := &Clawdbot{}
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
-	configDir := filepath.Join(tmpDir, ".openclaw")
-	configPath := filepath.Join(configDir, "openclaw.json")
+	configDir := filepath.Join(tmpDir, ".clawdbot")
+	configPath := filepath.Join(configDir, "clawdbot.json")

 	os.MkdirAll(configDir, 0o755)
-	os.WriteFile(configPath, []byte(testOpenclawFixture), 0o644)
+	os.WriteFile(configPath, []byte(testClawdbotFixture), 0o644)

 	for i := range 10 {
 		models := []string{"model-a", "model-b"}
@@ -573,12 +573,12 @@ func TestOpenclawEdit_MultipleConsecutiveEdits(t *testing.T) {
 	}
 }

-func TestOpenclawEdit_BackupCreated(t *testing.T) {
-	c := &Openclaw{}
+func TestClawdbotEdit_BackupCreated(t *testing.T) {
+	c := &Clawdbot{}
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
-	configDir := filepath.Join(tmpDir, ".openclaw")
-	configPath := filepath.Join(configDir, "openclaw.json")
+	configDir := filepath.Join(tmpDir, ".clawdbot")
+	configPath := filepath.Join(configDir, "clawdbot.json")
 	backupDir := filepath.Join(os.TempDir(), "ollama-backups")

 	os.MkdirAll(configDir, 0o755)
@@ -590,7 +590,7 @@ func TestOpenclawEdit_BackupCreated(t *testing.T) {
 		t.Fatal(err)
 	}

-	backups, _ := filepath.Glob(filepath.Join(backupDir, "openclaw.json.*"))
+	backups, _ := filepath.Glob(filepath.Join(backupDir, "clawdbot.json.*"))
 	foundBackup := false
 	for _, backup := range backups {
 		data, _ := os.ReadFile(backup)
@@ -605,151 +605,11 @@ func TestOpenclawEdit_BackupCreated(t *testing.T) {
 	}
 }

-func TestOpenclawClawdbotAlias(t *testing.T) {
-	for _, alias := range []string{"clawdbot", "moltbot"} {
-		t.Run(alias+" alias resolves to Openclaw runner", func(t *testing.T) {
-			r, ok := integrations[alias]
-			if !ok {
-				t.Fatalf("%s not found in integrations", alias)
-			}
-			if _, ok := r.(*Openclaw); !ok {
-				t.Errorf("%s integration is %T, want *Openclaw", alias, r)
-			}
-		})
-
-		t.Run(alias+" is hidden from selector", func(t *testing.T) {
-			if !integrationAliases[alias] {
-				t.Errorf("%s should be in integrationAliases", alias)
-			}
-		})
-	}
-}
-
-func TestOpenclawLegacyPaths(t *testing.T) {
-	c := &Openclaw{}
-
-	t.Run("falls back to legacy clawdbot path", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-		legacyDir := filepath.Join(tmpDir, ".clawdbot")
-		os.MkdirAll(legacyDir, 0o755)
-		os.WriteFile(filepath.Join(legacyDir, "clawdbot.json"), []byte(`{}`), 0o644)
-
-		paths := c.Paths()
-		if len(paths) != 1 {
-			t.Fatalf("expected 1 path, got %d", len(paths))
-		}
-		if paths[0] != filepath.Join(legacyDir, "clawdbot.json") {
-			t.Errorf("expected legacy path, got %s", paths[0])
-		}
-	})
-
-	t.Run("prefers new path over legacy", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-		newDir := filepath.Join(tmpDir, ".openclaw")
-		legacyDir := filepath.Join(tmpDir, ".clawdbot")
-		os.MkdirAll(newDir, 0o755)
-		os.MkdirAll(legacyDir, 0o755)
-		os.WriteFile(filepath.Join(newDir, "openclaw.json"), []byte(`{}`), 0o644)
-		os.WriteFile(filepath.Join(legacyDir, "clawdbot.json"), []byte(`{}`), 0o644)
-
-		paths := c.Paths()
-		if len(paths) != 1 {
-			t.Fatalf("expected 1 path, got %d", len(paths))
-		}
-		if paths[0] != filepath.Join(newDir, "openclaw.json") {
-			t.Errorf("expected new path, got %s", paths[0])
-		}
-	})
-
-	t.Run("Models reads from legacy path", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-		legacyDir := filepath.Join(tmpDir, ".clawdbot")
-		os.MkdirAll(legacyDir, 0o755)
-		os.WriteFile(filepath.Join(legacyDir, "clawdbot.json"), []byte(`{
-			"models":{"providers":{"ollama":{"models":[{"id":"llama3.2"}]}}}
-		}`), 0o644)
-
-		models := c.Models()
-		if len(models) != 1 || models[0] != "llama3.2" {
-			t.Errorf("expected [llama3.2], got %v", models)
-		}
-	})
-
-	t.Run("Models prefers new path over legacy", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-		newDir := filepath.Join(tmpDir, ".openclaw")
-		legacyDir := filepath.Join(tmpDir, ".clawdbot")
-		os.MkdirAll(newDir, 0o755)
-		os.MkdirAll(legacyDir, 0o755)
-		os.WriteFile(filepath.Join(newDir, "openclaw.json"), []byte(`{
-			"models":{"providers":{"ollama":{"models":[{"id":"new-model"}]}}}
-		}`), 0o644)
-		os.WriteFile(filepath.Join(legacyDir, "clawdbot.json"), []byte(`{
-			"models":{"providers":{"ollama":{"models":[{"id":"legacy-model"}]}}}
-		}`), 0o644)
-
-		models := c.Models()
-		if len(models) != 1 || models[0] != "new-model" {
-			t.Errorf("expected [new-model], got %v", models)
-		}
-	})
-
-	t.Run("Edit reads new path over legacy when both exist", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-		newDir := filepath.Join(tmpDir, ".openclaw")
-		legacyDir := filepath.Join(tmpDir, ".clawdbot")
-		os.MkdirAll(newDir, 0o755)
-		os.MkdirAll(legacyDir, 0o755)
-		os.WriteFile(filepath.Join(newDir, "openclaw.json"), []byte(`{"theme":"new"}`), 0o644)
-		os.WriteFile(filepath.Join(legacyDir, "clawdbot.json"), []byte(`{"theme":"legacy"}`), 0o644)
-
-		if err := c.Edit([]string{"llama3.2"}); err != nil {
-			t.Fatal(err)
-		}
-
-		data, _ := os.ReadFile(filepath.Join(newDir, "openclaw.json"))
-		var cfg map[string]any
-		json.Unmarshal(data, &cfg)
-		if cfg["theme"] != "new" {
-			t.Errorf("expected theme from new config, got %v", cfg["theme"])
-		}
-	})
-
-	t.Run("Edit migrates from legacy config", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-		legacyDir := filepath.Join(tmpDir, ".clawdbot")
-		os.MkdirAll(legacyDir, 0o755)
-		os.WriteFile(filepath.Join(legacyDir, "clawdbot.json"), []byte(`{"theme":"dark"}`), 0o644)
-
-		if err := c.Edit([]string{"llama3.2"}); err != nil {
-			t.Fatal(err)
-		}
-
-		// Should write to new path
-		newPath := filepath.Join(tmpDir, ".openclaw", "openclaw.json")
-		data, err := os.ReadFile(newPath)
-		if err != nil {
-			t.Fatal("expected new config file to be created")
-		}
-		var cfg map[string]any
-		json.Unmarshal(data, &cfg)
-		if cfg["theme"] != "dark" {
-			t.Error("legacy theme setting was not migrated")
-		}
-	})
-}
-
-func TestOpenclawEdit_CreatesDirectoryIfMissing(t *testing.T) {
-	c := &Openclaw{}
+func TestClawdbotEdit_CreatesDirectoryIfMissing(t *testing.T) {
+	c := &Clawdbot{}
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
-	configDir := filepath.Join(tmpDir, ".openclaw")
+	configDir := filepath.Join(tmpDir, ".clawdbot")

 	if _, err := os.Stat(configDir); !os.IsNotExist(err) {
 		t.Fatal("directory should not exist before test")
@@ -763,116 +623,3 @@ func TestOpenclawEdit_CreatesDirectoryIfMissing(t *testing.T) {
 		t.Fatal("directory was not created")
 	}
 }
-
-func TestOpenclawOnboarded(t *testing.T) {
-	c := &Openclaw{}
-
-	t.Run("returns false when no config exists", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-		if c.onboarded() {
-			t.Error("expected false when no config exists")
-		}
-	})
-
-	t.Run("returns false when config exists but no wizard section", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-		configDir := filepath.Join(tmpDir, ".openclaw")
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(filepath.Join(configDir, "openclaw.json"), []byte(`{"theme":"dark"}`), 0o644)
-
-		if c.onboarded() {
-			t.Error("expected false when no wizard section")
-		}
-	})
-
-	t.Run("returns false when wizard section exists but no lastRunAt", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-		configDir := filepath.Join(tmpDir, ".openclaw")
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(filepath.Join(configDir, "openclaw.json"), []byte(`{"wizard":{}}`), 0o644)
-
-		if c.onboarded() {
-			t.Error("expected false when wizard.lastRunAt is missing")
-		}
-	})
-
-	t.Run("returns false when wizard.lastRunAt is empty string", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-		configDir := filepath.Join(tmpDir, ".openclaw")
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(filepath.Join(configDir, "openclaw.json"), []byte(`{"wizard":{"lastRunAt":""}}`), 0o644)
-
-		if c.onboarded() {
-			t.Error("expected false when wizard.lastRunAt is empty")
-		}
-	})
-
-	t.Run("returns true when wizard.lastRunAt is set", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-		configDir := filepath.Join(tmpDir, ".openclaw")
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(filepath.Join(configDir, "openclaw.json"), []byte(`{"wizard":{"lastRunAt":"2024-01-01T00:00:00Z"}}`), 0o644)
-
-		if !c.onboarded() {
-			t.Error("expected true when wizard.lastRunAt is set")
-		}
-	})
-
-	t.Run("checks legacy clawdbot path", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-		legacyDir := filepath.Join(tmpDir, ".clawdbot")
-		os.MkdirAll(legacyDir, 0o755)
-		os.WriteFile(filepath.Join(legacyDir, "clawdbot.json"), []byte(`{"wizard":{"lastRunAt":"2024-01-01T00:00:00Z"}}`), 0o644)
-
-		if !c.onboarded() {
-			t.Error("expected true when legacy config has wizard.lastRunAt")
-		}
-	})
-
-	t.Run("prefers new path over legacy", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-		newDir := filepath.Join(tmpDir, ".openclaw")
-		legacyDir := filepath.Join(tmpDir, ".clawdbot")
-		os.MkdirAll(newDir, 0o755)
-		os.MkdirAll(legacyDir, 0o755)
-		// New path has no wizard marker
-		os.WriteFile(filepath.Join(newDir, "openclaw.json"), []byte(`{}`), 0o644)
-		// Legacy has wizard marker
-		os.WriteFile(filepath.Join(legacyDir, "clawdbot.json"), []byte(`{"wizard":{"lastRunAt":"2024-01-01T00:00:00Z"}}`), 0o644)
-
-		if c.onboarded() {
-			t.Error("expected false - should prefer new path which has no wizard marker")
-		}
-	})
-
-	t.Run("handles corrupted JSON gracefully", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-		configDir := filepath.Join(tmpDir, ".openclaw")
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(filepath.Join(configDir, "openclaw.json"), []byte(`{corrupted`), 0o644)
-
-		if c.onboarded() {
-			t.Error("expected false for corrupted JSON")
-		}
-	})
-
-	t.Run("handles wrong type for wizard section", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-		configDir := filepath.Join(tmpDir, ".openclaw")
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(filepath.Join(configDir, "openclaw.json"), []byte(`{"wizard":"not a map"}`), 0o644)
-
-		if c.onboarded() {
-			t.Error("expected false when wizard is wrong type")
-		}
-	})
-}
--- a/cmd/config/integrations.go
+++ b/cmd/config/integrations.go
@@ -41,18 +41,10 @@ type Editor interface {
 // integrations is the registry of available integrations.
 var integrations = map[string]Runner{
 	"claude":   &Claude{},
-	"clawdbot": &Openclaw{},
+	"clawdbot": &Clawdbot{},
 	"codex":    &Codex{},
-	"moltbot":  &Openclaw{},
 	"droid":    &Droid{},
 	"opencode": &OpenCode{},
-	"openclaw": &Openclaw{},
-}
-
-// integrationAliases are hidden from the interactive selector but work as CLI arguments.
-var integrationAliases = map[string]bool{
-	"clawdbot": true,
-	"moltbot":  true,
 }

 func selectIntegration() (string, error) {
@@ -63,9 +55,6 @@ func selectIntegration() (string, error) {
 	names := slices.Sorted(maps.Keys(integrations))
 	var items []selectItem
 	for _, name := range names {
-		if integrationAliases[name] {
-			continue
-		}
 		r := integrations[name]
 		description := r.String()
 		if conn, err := loadIntegration(name); err == nil && len(conn.Models) > 0 {
@@ -254,10 +243,10 @@ func LaunchCmd(checkServerHeartbeat func(cmd *cobra.Command, args []string) erro

 Supported integrations:
  claude    Claude Code
+  clawdbot  Clawdbot
  codex     Codex
  droid     Droid
  opencode  OpenCode
-  openclaw  OpenClaw (aliases: clawdbot, moltbot)

 Examples:
  ollama launch
--- a/cmd/config/selector.go
+++ b/cmd/config/selector.go
@@ -275,11 +275,7 @@ func parseInput(r io.Reader) (inputEvent, byte, error) {
 func renderSelect(w io.Writer, prompt string, s *selectState) int {
 	filtered := s.filtered()

-	if s.filter == "" {
-		fmt.Fprintf(w, "%s %sType to filter...%s\r\n", prompt, ansiGray, ansiReset)
-	} else {
-		fmt.Fprintf(w, "%s %s\r\n", prompt, s.filter)
-	}
+	fmt.Fprintf(w, "%s %s\r\n", prompt, s.filter)
 	lineCount := 1

 	if len(filtered) == 0 {
@@ -318,11 +314,7 @@ func renderSelect(w io.Writer, prompt string, s *selectState) int {
 func renderMultiSelect(w io.Writer, prompt string, s *multiSelectState) int {
 	filtered := s.filtered()

-	if s.filter == "" {
-		fmt.Fprintf(w, "%s %sType to filter...%s\r\n", prompt, ansiGray, ansiReset)
-	} else {
-		fmt.Fprintf(w, "%s %s\r\n", prompt, s.filter)
-	}
+	fmt.Fprintf(w, "%s %s\r\n", prompt, s.filter)
 	lineCount := 1

 	if len(filtered) == 0 {
--- a/docs/api.md
+++ b/docs/api.md
@@ -15,6 +15,7 @@
 - [Push a Model](#push-a-model)
 - [Generate Embeddings](#generate-embeddings)
 - [List Running Models](#list-running-models)
+- [Usage](#usage)
 - [Version](#version)
 - [Experimental: Image Generation](#image-generation-experimental)

@@ -1854,6 +1855,53 @@ curl http://localhost:11434/api/embeddings -d '{
 }
 ```

+## Usage
+
+```
+GET /api/usage
+```
+
+Show aggregate usage statistics per model since the server started. All timestamps are UTC in RFC 3339 format.
+
+### Examples
+
+#### Request
+
+```shell
+curl http://localhost:11434/api/usage
+```
+
+#### Response
+
+```json
+{
+  "start": "2025-01-27T20:00:00Z",
+  "usage": [
+    {
+      "model": "llama3.2",
+      "requests": 5,
+      "prompt_tokens": 130,
+      "completion_tokens": 890
+    },
+    {
+      "model": "deepseek-r1",
+      "requests": 2,
+      "prompt_tokens": 48,
+      "completion_tokens": 312
+    }
+  ]
+}
+```
+
+#### Response fields
+
+- `start`: when the server started tracking usage (UTC, RFC 3339)
+- `usage`: list of per-model usage statistics
+  - `model`: model name
+  - `requests`: total number of completed requests
+  - `prompt_tokens`: total prompt tokens evaluated
+  - `completion_tokens`: total completion tokens generated
+
 ## Version

 ```
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -71,10 +71,6 @@
    {
      "source": "/api",
      "destination": "/api/introduction"
-    },
-    {
-      "source": "/integrations/clawdbot",
-      "destination": "/integrations/openclaw"
    }
  ],
  "navigation": {
@@ -106,8 +102,8 @@
            "group": "Integrations",
            "pages": [
              "/integrations/claude-code",
+              "/integrations/clawdbot",
              "/integrations/cline",
-              "/integrations/openclaw",
              "/integrations/codex",
              "/integrations/droid",
              "/integrations/goose",
--- a/docs/gpu.mdx
+++ b/docs/gpu.mdx
@@ -10,7 +10,6 @@ Check your compute compatibility to see if your card is supported:

 | Compute Capability | Family              | Cards                                                                                                                          |
 | ------------------ | ------------------- | ------------------------------------------------------------------------------------------------------------------------------ |
-| 12.1               | NVIDIA              | `GB10 (DGX Spark)`                                                                                                             |
 | 12.0               | GeForce RTX 50xx    | `RTX 5060` `RTX 5060 Ti` `RTX 5070` `RTX 5070 Ti` `RTX 5080` `RTX 5090`                                                        |
 |                    | NVIDIA Professional | `RTX PRO 4000 Blackwell` `RTX PRO 4500 Blackwell` `RTX PRO 5000 Blackwell` `RTX PRO 6000 Blackwell`                            |
 | 9.0                | NVIDIA              | `H200` `H100`                                                                                                                  |
@@ -164,4 +163,4 @@ To select specific Vulkan GPU(s), you can set the environment variable
 `GGML_VK_VISIBLE_DEVICES` to one or more numeric IDs on the Ollama server as
 described in the [FAQ](faq#how-do-i-configure-ollama-server). If you
 encounter any problems with Vulkan based GPUs, you can disable all Vulkan GPUs
-by setting `GGML_VK_VISIBLE_DEVICES=-1` 
+by setting `GGML_VK_VISIBLE_DEVICES=-1` 
--- a/docs/import.mdx
+++ b/docs/import.mdx
@@ -134,12 +134,22 @@ success

 ### Supported Quantizations

+- `q4_0`
+- `q4_1`
+- `q5_0`
+- `q5_1`
 - `q8_0`

 #### K-means Quantizations

+- `q3_K_S`
+- `q3_K_M`
+- `q3_K_L`
 - `q4_K_S`
 - `q4_K_M`
+- `q5_K_S`
+- `q5_K_M`
+- `q6_K`

 ## Sharing your model on ollama.com

--- a/docs/integrations/clawdbot.mdx
+++ b/docs/integrations/clawdbot.mdx
@@ -1,43 +1,41 @@
 ---
-title: OpenClaw
+title: Clawdbot
 ---

-OpenClaw is a personal AI assistant that runs on your own devices. It bridges messaging services (WhatsApp, Telegram, Slack, Discord, iMessage, and more) to AI coding agents through a centralized gateway.
+Clawdbot is a personal AI assistant that runs on your own devices. It bridges messaging services (WhatsApp, Telegram, Slack, Discord, iMessage, and more) to AI coding agents through a centralized gateway.

 ## Install

-Install [OpenClaw](https://openclaw.ai/) 
+Install [Clawdbot](https://clawd.bot/) 

 ```bash
-npm install -g openclaw@latest
+npm install -g clawdbot@latest
 ```

 Then run the onboarding wizard:

 ```bash
-openclaw onboard --install-daemon
+clawdbot onboard --install-daemon
 ```

-<Note>OpenClaw requires a larger context window. It is recommended to use a context window of at least 64k tokens. See [Context length](/context-length) for more information.</Note>
+<Note>Clawdbot requires a larger context window. It is recommended to use a context window of at least 64k tokens. See [Context length](/context-length) for more information.</Note>

 ## Usage with Ollama

 ### Quick setup

 ```bash
-ollama launch openclaw
+ollama launch clawdbot
 ```

-<Note>Previously known as Clawdbot. `ollama launch clawdbot` still works as an alias.</Note>
-
-This configures OpenClaw to use Ollama and starts the gateway.
+This configures Clawdbot to use Ollama and starts the gateway.
 If the gateway is already running, no changes need to be made as the gateway will auto-reload the changes. 


 To configure without launching:

 ```shell
-ollama launch openclaw --config
+ollama launch clawdbot --config
 ```

 ## Recommended Models
--- a/docs/integrations/opencode.mdx
+++ b/docs/integrations/opencode.mdx
@@ -9,7 +9,7 @@ OpenCode is an open-source AI coding assistant that runs in your terminal.
 Install the [OpenCode CLI](https://opencode.ai):

 ```bash
-curl -fsSL https://opencode.ai/install | bash
+curl -fsSL https://opencode.ai/install.sh | bash
 ```

 <Note>OpenCode requires a larger context window. It is recommended to use a context window of at least 64k tokens. See [Context length](/context-length) for more information.</Note>
--- a/envconfig/config.go
+++ b/envconfig/config.go
@@ -201,7 +201,7 @@ var (
 	// Enable the new Ollama engine
 	NewEngine = Bool("OLLAMA_NEW_ENGINE")
 	// ContextLength sets the default context length
-	ContextLength = Uint("OLLAMA_CONTEXT_LENGTH", 0)
+	ContextLength = Uint("OLLAMA_CONTEXT_LENGTH", 4096)
 	// Auth enables authentication between the Ollama client and server
 	UseAuth = Bool("OLLAMA_AUTH")
 	// Enable Vulkan backend
@@ -290,7 +290,7 @@ func AsMap() map[string]EnvVar {
 		"OLLAMA_ORIGINS":           {"OLLAMA_ORIGINS", AllowedOrigins(), "A comma separated list of allowed origins"},
 		"OLLAMA_SCHED_SPREAD":      {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"},
 		"OLLAMA_MULTIUSER_CACHE":   {"OLLAMA_MULTIUSER_CACHE", MultiUserCache(), "Optimize prompt caching for multi-user scenarios"},
-		"OLLAMA_CONTEXT_LENGTH":    {"OLLAMA_CONTEXT_LENGTH", ContextLength(), "Context length to use unless otherwise specified (default: 4k/32k/256k based on VRAM)"},
+		"OLLAMA_CONTEXT_LENGTH":    {"OLLAMA_CONTEXT_LENGTH", ContextLength(), "Context length to use unless otherwise specified (default: 4096)"},
 		"OLLAMA_NEW_ENGINE":        {"OLLAMA_NEW_ENGINE", NewEngine(), "Enable the new Ollama engine"},
 		"OLLAMA_REMOTES":           {"OLLAMA_REMOTES", Remotes(), "Allowed hosts for remote models (default \"ollama.com\")"},

--- a/envconfig/config_test.go
+++ b/envconfig/config_test.go
@@ -282,7 +282,7 @@ func TestVar(t *testing.T) {

 func TestContextLength(t *testing.T) {
 	cases := map[string]uint{
-		"":     0,
+		"":     4096,
 		"2048": 2048,
 	}

--- a/llm/server.go
+++ b/llm/server.go
@@ -80,7 +80,6 @@ type LlamaServer interface {
 	GetPort() int
 	GetDeviceInfos(ctx context.Context) []ml.DeviceInfo
 	HasExited() bool
-	ContextLength() int
 }

 // llmServer is an instance of a runner hosting a single model
@@ -1201,8 +1200,7 @@ func (s *llmServer) initModel(ctx context.Context, req LoadRequest, operation Lo

 	resp, err := http.DefaultClient.Do(r)
 	if err != nil {
-		slog.Error("do load request", "error", err)
-		return nil, errors.New("model failed to load, this may be due to resource limitations or an internal error, check ollama server logs for details")
+		return nil, fmt.Errorf("do load request: %w", err)
 	}
 	defer resp.Body.Close()

@@ -1903,10 +1901,6 @@ func (s *llmServer) VRAMByGPU(id ml.DeviceID) uint64 {
 	return 0
 }

-func (s *llmServer) ContextLength() int {
-	return s.options.NumCtx
-}
-
 func (s *ollamaServer) GetDeviceInfos(ctx context.Context) []ml.DeviceInfo {
 	devices, err := ml.GetDevicesFromRunner(ctx, s)
 	if err != nil {
--- a/runner/ollamarunner/runner.go
+++ b/runner/ollamarunner/runner.go
@@ -1358,7 +1358,7 @@ func (s *Server) info(w http.ResponseWriter, r *http.Request) {
 		// Dummy load to get the backend wired up
 		f, err := os.CreateTemp("", "*.bin")
 		if err != nil {
-			http.Error(w, fmt.Sprintf("failed to initialize backend: %v", err), http.StatusInternalServerError)
+			http.Error(w, fmt.Sprintf("failed to initialize baackend: %v", err), http.StatusInternalServerError)
 			return
 		}
 		defer f.Close()
@@ -1368,13 +1368,13 @@ func (s *Server) info(w http.ResponseWriter, r *http.Request) {
 			"general.architecture": "llama",
 			"tokenizer.ggml.model": "gpt2",
 		}, nil); err != nil {
-			http.Error(w, fmt.Sprintf("failed to initialize backend: %v", err), http.StatusInternalServerError)
+			http.Error(w, fmt.Sprintf("failed to initialize baackend: %v", err), http.StatusInternalServerError)
 			return
 		}

 		m, err = model.New(f.Name(), ml.BackendParams{NumThreads: runtime.NumCPU(), AllocMemory: false, GPULayers: ml.GPULayersList{{}}})
 		if err != nil {
-			http.Error(w, fmt.Sprintf("failed to initialize backend: %v", err), http.StatusInternalServerError)
+			http.Error(w, fmt.Sprintf("failed to initialize baackend: %v", err), http.StatusInternalServerError)
 			return
 		}
 		slog.Debug("dummy model load took", "duration", time.Since(startLoad))
--- a/server/routes.go
+++ b/server/routes.go
@@ -75,12 +75,17 @@ func experimentEnabled(name string) bool {

 var useClient2 = experimentEnabled("client2")

+// Low VRAM mode is based on the sum of total VRAM (not free) and triggers
+// reduced context length on some models
+var lowVRAMThreshold uint64 = 20 * format.GibiByte
+
 var mode string = gin.DebugMode

 type Server struct {
-	addr          net.Addr
-	sched         *Scheduler
-	defaultNumCtx int
+	addr    net.Addr
+	sched   *Scheduler
+	lowVRAM bool
+	usage   *UsageTracker
 }

 func init() {
@@ -103,12 +108,8 @@ var (
 	errBadTemplate = errors.New("template error")
 )

-func (s *Server) modelOptions(model *Model, requestOpts map[string]any) (api.Options, error) {
+func modelOptions(model *Model, requestOpts map[string]any) (api.Options, error) {
 	opts := api.DefaultOptions()
-	if opts.NumCtx == 0 {
-		opts.NumCtx = s.defaultNumCtx
-	}
-
 	if err := opts.FromMap(model.Options); err != nil {
 		return api.Options{}, err
 	}
@@ -140,11 +141,20 @@ func (s *Server) scheduleRunner(ctx context.Context, name string, caps []model.C
 		return nil, nil, nil, fmt.Errorf("%s %w", name, err)
 	}

-	opts, err := s.modelOptions(model, requestOpts)
+	opts, err := modelOptions(model, requestOpts)
 	if err != nil {
 		return nil, nil, nil, err
 	}

+	// This model is much more capable with a larger context, so set that
+	// unless it would penalize performance too much
+	if !s.lowVRAM && slices.Contains([]string{
+		"gptoss", "gpt-oss",
+		"qwen3vl", "qwen3vlmoe",
+	}, model.Config.ModelFamily) {
+		opts.NumCtx = max(opts.NumCtx, 8192)
+	}
+
 	runnerCh, errCh := s.sched.GetRunner(ctx, model, opts, keepAlive)
 	var runner *runnerRef
 	select {
@@ -264,6 +274,10 @@ func (s *Server) GenerateHandler(c *gin.Context) {
 		c.Header("Content-Type", contentType)

 		fn := func(resp api.GenerateResponse) error {
+			if resp.Done {
+				s.usage.Record(origModel, resp.PromptEvalCount, resp.EvalCount)
+			}
+
 			resp.Model = origModel
 			resp.RemoteModel = m.Config.RemoteModel
 			resp.RemoteHost = m.Config.RemoteHost
@@ -570,6 +584,8 @@ func (s *Server) GenerateHandler(c *gin.Context) {
 					}
 					res.Context = tokens
 				}
+
+				s.usage.Record(req.Model, cr.PromptEvalCount, cr.EvalCount)
 			}

 			if builtinParser != nil {
@@ -1581,6 +1597,8 @@ func (s *Server) GenerateRoutes(rc *ollama.Registry) (http.Handler, error) {
 	r.HEAD("/api/blobs/:digest", s.HeadBlobHandler)
 	r.POST("/api/copy", s.CopyHandler)

+	r.GET("/api/usage", s.UsageHandler)
+
 	// Inference
 	r.GET("/api/ps", s.PsHandler)
 	r.POST("/api/generate", s.GenerateHandler)
@@ -1649,7 +1667,7 @@ func Serve(ln net.Listener) error {
 		}
 	}

-	s := &Server{addr: ln.Addr()}
+	s := &Server{addr: ln.Addr(), usage: NewUsageTracker()}

 	var rc *ollama.Registry
 	if useClient2 {
@@ -1711,18 +1729,10 @@ func Serve(ln net.Listener) error {
 	for _, gpu := range gpus {
 		totalVRAM += gpu.TotalMemory - envconfig.GpuOverhead()
 	}
-
-	// Set default context based on VRAM tier
-	// Use slightly lower thresholds (47/23 GiB vs. 48/24 GiB) to account for small differences in the exact value
-	switch {
-	case totalVRAM >= 47*format.GibiByte:
-		s.defaultNumCtx = 262144
-	case totalVRAM >= 23*format.GibiByte:
-		s.defaultNumCtx = 32768
-	default:
-		s.defaultNumCtx = 4096
+	if totalVRAM < lowVRAMThreshold {
+		s.lowVRAM = true
+		slog.Info("entering low vram mode", "total vram", format.HumanBytes2(totalVRAM), "threshold", format.HumanBytes2(lowVRAMThreshold))
 	}
-	slog.Info("vram-based default context", "total_vram", format.HumanBytes2(totalVRAM), "default_num_ctx", s.defaultNumCtx)

 	err = srvr.Serve(ln)
 	// If server is closed from the signal handler, wait for the ctx to be done
@@ -1874,6 +1884,10 @@ func (s *Server) SignoutHandler(c *gin.Context) {
 	c.JSON(http.StatusOK, nil)
 }

+func (s *Server) UsageHandler(c *gin.Context) {
+	c.JSON(http.StatusOK, s.usage.Stats())
+}
+
 func (s *Server) PsHandler(c *gin.Context) {
 	models := []api.ProcessModelResponse{}

@@ -1896,8 +1910,8 @@ func (s *Server) PsHandler(c *gin.Context) {
 			Details:   modelDetails,
 			ExpiresAt: v.expiresAt,
 		}
-		if v.llama != nil {
-			mr.ContextLength = v.llama.ContextLength()
+		if v.Options != nil {
+			mr.ContextLength = v.Options.NumCtx
 		}
 		// The scheduler waits to set expiresAt, so if a model is loading it's
 		// possible that it will be set to the unix epoch. For those cases, just
@@ -2032,6 +2046,10 @@ func (s *Server) ChatHandler(c *gin.Context) {
 		c.Header("Content-Type", contentType)

 		fn := func(resp api.ChatResponse) error {
+			if resp.Done {
+				s.usage.Record(origModel, resp.PromptEvalCount, resp.EvalCount)
+			}
+
 			resp.Model = origModel
 			resp.RemoteModel = m.Config.RemoteModel
 			resp.RemoteHost = m.Config.RemoteHost
@@ -2252,6 +2270,8 @@ func (s *Server) ChatHandler(c *gin.Context) {
 					res.DoneReason = r.DoneReason.String()
 					res.TotalDuration = time.Since(checkpointStart)
 					res.LoadDuration = checkpointLoaded.Sub(checkpointStart)
+
+					s.usage.Record(req.Model, r.PromptEvalCount, r.EvalCount)
 				}

 				if builtinParser != nil {
--- a/server/routes_debug_test.go
+++ b/server/routes_debug_test.go
@@ -15,7 +15,6 @@ import (
 )

 func TestGenerateDebugRenderOnly(t *testing.T) {
-	t.Setenv("OLLAMA_CONTEXT_LENGTH", "4096")
 	gin.SetMode(gin.TestMode)

 	mock := mockRunner{
@@ -30,6 +29,7 @@ func TestGenerateDebugRenderOnly(t *testing.T) {
 	}

 	s := Server{
+		usage: NewUsageTracker(),
 		sched: &Scheduler{
 			pendingReqCh:    make(chan *LlmRequest, 1),
 			finishedReqCh:   make(chan *LlmRequest, 1),
@@ -209,7 +209,6 @@ func TestGenerateDebugRenderOnly(t *testing.T) {
 }

 func TestChatDebugRenderOnly(t *testing.T) {
-	t.Setenv("OLLAMA_CONTEXT_LENGTH", "4096")
 	gin.SetMode(gin.TestMode)

 	mock := mockRunner{
@@ -224,6 +223,7 @@ func TestChatDebugRenderOnly(t *testing.T) {
 	}

 	s := Server{
+		usage: NewUsageTracker(),
 		sched: &Scheduler{
 			pendingReqCh:    make(chan *LlmRequest, 1),
 			finishedReqCh:   make(chan *LlmRequest, 1),
--- a/server/routes_generate_renderer_test.go
+++ b/server/routes_generate_renderer_test.go
@@ -20,7 +20,6 @@ import (
 // TestGenerateWithBuiltinRenderer tests that api/generate uses built-in renderers
 // when in chat-like flow (messages present, no suffix, no template)
 func TestGenerateWithBuiltinRenderer(t *testing.T) {
-	t.Setenv("OLLAMA_CONTEXT_LENGTH", "4096")
 	gin.SetMode(gin.TestMode)

 	mock := mockRunner{
@@ -35,6 +34,7 @@ func TestGenerateWithBuiltinRenderer(t *testing.T) {
 	}

 	s := Server{
+		usage: NewUsageTracker(),
 		sched: &Scheduler{
 			pendingReqCh:    make(chan *LlmRequest, 1),
 			finishedReqCh:   make(chan *LlmRequest, 1),
@@ -205,7 +205,6 @@ func TestGenerateWithBuiltinRenderer(t *testing.T) {

 // TestGenerateWithDebugRenderOnly tests that debug_render_only works with built-in renderers
 func TestGenerateWithDebugRenderOnly(t *testing.T) {
-	t.Setenv("OLLAMA_CONTEXT_LENGTH", "4096")
 	gin.SetMode(gin.TestMode)

 	mock := mockRunner{
@@ -220,6 +219,7 @@ func TestGenerateWithDebugRenderOnly(t *testing.T) {
 	}

 	s := Server{
+		usage: NewUsageTracker(),
 		sched: &Scheduler{
 			pendingReqCh:    make(chan *LlmRequest, 1),
 			finishedReqCh:   make(chan *LlmRequest, 1),
--- a/server/routes_generate_test.go
+++ b/server/routes_generate_test.go
@@ -88,19 +88,39 @@ func TestGenerateChatRemote(t *testing.T) {
 		if r.Method != http.MethodPost {
 			t.Errorf("Expected POST request, got %s", r.Method)
 		}
-		if r.URL.Path != "/api/chat" {
-			t.Errorf("Expected path '/api/chat', got %s", r.URL.Path)
-		}

 		w.WriteHeader(http.StatusOK)
 		w.Header().Set("Content-Type", "application/json")
-		resp := api.ChatResponse{
-			Model:      "test",
-			Done:       true,
-			DoneReason: "load",
-		}
-		if err := json.NewEncoder(w).Encode(&resp); err != nil {
-			t.Fatal(err)
+
+		switch r.URL.Path {
+		case "/api/chat":
+			resp := api.ChatResponse{
+				Model:      "test",
+				Done:       true,
+				DoneReason: "load",
+				Metrics: api.Metrics{
+					PromptEvalCount: 10,
+					EvalCount:       20,
+				},
+			}
+			if err := json.NewEncoder(w).Encode(&resp); err != nil {
+				t.Fatal(err)
+			}
+		case "/api/generate":
+			resp := api.GenerateResponse{
+				Model:      "test",
+				Done:       true,
+				DoneReason: "stop",
+				Metrics: api.Metrics{
+					PromptEvalCount: 5,
+					EvalCount:       15,
+				},
+			}
+			if err := json.NewEncoder(w).Encode(&resp); err != nil {
+				t.Fatal(err)
+			}
+		default:
+			t.Errorf("unexpected path %s", r.URL.Path)
 		}
 	}))
 	defer rs.Close()
@@ -111,7 +131,7 @@ func TestGenerateChatRemote(t *testing.T) {
 	}

 	t.Setenv("OLLAMA_REMOTES", p.Hostname())
-	s := Server{}
+	s := Server{usage: NewUsageTracker()}
 	w := createRequest(t, s.CreateHandler, api.CreateRequest{
 		Model:      "test-cloud",
 		RemoteHost: rs.URL,
@@ -159,10 +179,64 @@ func TestGenerateChatRemote(t *testing.T) {
 			t.Errorf("expected done reason load, got %s", actual.DoneReason)
 		}
 	})
+
+	t.Run("remote chat usage tracking", func(t *testing.T) {
+		stats := s.usage.Stats()
+		found := false
+		for _, m := range stats.Usage {
+			if m.Model == "test-cloud" {
+				found = true
+				if m.Requests != 1 {
+					t.Errorf("expected 1 request, got %d", m.Requests)
+				}
+				if m.PromptTokens != 10 {
+					t.Errorf("expected 10 prompt tokens, got %d", m.PromptTokens)
+				}
+				if m.CompletionTokens != 20 {
+					t.Errorf("expected 20 completion tokens, got %d", m.CompletionTokens)
+				}
+			}
+		}
+		if !found {
+			t.Error("expected usage entry for test-cloud")
+		}
+	})
+
+	t.Run("remote generate usage tracking", func(t *testing.T) {
+		// Reset the tracker for a clean test
+		s.usage = NewUsageTracker()
+
+		w := createRequest(t, s.GenerateHandler, api.GenerateRequest{
+			Model:  "test-cloud",
+			Prompt: "hello",
+		})
+		if w.Code != http.StatusOK {
+			t.Fatalf("expected status 200, got %d", w.Code)
+		}
+
+		stats := s.usage.Stats()
+		found := false
+		for _, m := range stats.Usage {
+			if m.Model == "test-cloud" {
+				found = true
+				if m.Requests != 1 {
+					t.Errorf("expected 1 request, got %d", m.Requests)
+				}
+				if m.PromptTokens != 5 {
+					t.Errorf("expected 5 prompt tokens, got %d", m.PromptTokens)
+				}
+				if m.CompletionTokens != 15 {
+					t.Errorf("expected 15 completion tokens, got %d", m.CompletionTokens)
+				}
+			}
+		}
+		if !found {
+			t.Error("expected usage entry for test-cloud")
+		}
+	})
 }

 func TestGenerateChat(t *testing.T) {
-	t.Setenv("OLLAMA_CONTEXT_LENGTH", "4096")
 	gin.SetMode(gin.TestMode)

 	mock := mockRunner{
@@ -177,6 +251,7 @@ func TestGenerateChat(t *testing.T) {
 	}

 	s := Server{
+		usage: NewUsageTracker(),
 		sched: &Scheduler{
 			pendingReqCh:    make(chan *LlmRequest, 1),
 			finishedReqCh:   make(chan *LlmRequest, 1),
@@ -879,7 +954,6 @@ func TestGenerateChat(t *testing.T) {
 }

 func TestGenerate(t *testing.T) {
-	t.Setenv("OLLAMA_CONTEXT_LENGTH", "4096")
 	gin.SetMode(gin.TestMode)

 	mock := mockRunner{
@@ -894,6 +968,7 @@ func TestGenerate(t *testing.T) {
 	}

 	s := Server{
+		usage: NewUsageTracker(),
 		sched: &Scheduler{
 			pendingReqCh:    make(chan *LlmRequest, 1),
 			finishedReqCh:   make(chan *LlmRequest, 1),
@@ -1378,6 +1453,7 @@ func TestGenerateLogprobs(t *testing.T) {
 		}

 		s := &Server{
+			usage: NewUsageTracker(),
 			sched: &Scheduler{
 				pendingReqCh:    make(chan *LlmRequest, 1),
 				finishedReqCh:   make(chan *LlmRequest, 1),
@@ -1558,6 +1634,7 @@ func TestChatLogprobs(t *testing.T) {
 		}

 		s := &Server{
+			usage: NewUsageTracker(),
 			sched: &Scheduler{
 				pendingReqCh:    make(chan *LlmRequest, 1),
 				finishedReqCh:   make(chan *LlmRequest, 1),
@@ -1668,6 +1745,7 @@ func TestChatWithPromptEndingInThinkTag(t *testing.T) {
 		}

 		s := &Server{
+			usage: NewUsageTracker(),
 			sched: &Scheduler{
 				pendingReqCh:    make(chan *LlmRequest, 1),
 				finishedReqCh:   make(chan *LlmRequest, 1),
@@ -2114,6 +2192,7 @@ func TestGenerateUnload(t *testing.T) {
 	var loadFnCalled bool

 	s := Server{
+		usage: NewUsageTracker(),
 		sched: &Scheduler{
 			pendingReqCh:    make(chan *LlmRequest, 1),
 			finishedReqCh:   make(chan *LlmRequest, 1),
@@ -2215,6 +2294,7 @@ func TestGenerateWithImages(t *testing.T) {
 	}

 	s := Server{
+		usage: NewUsageTracker(),
 		sched: &Scheduler{
 			pendingReqCh:    make(chan *LlmRequest, 1),
 			finishedReqCh:   make(chan *LlmRequest, 1),
@@ -2357,7 +2437,6 @@ func TestGenerateWithImages(t *testing.T) {
 // TestImageGenerateStreamFalse tests that image generation respects stream=false
 // and returns a single JSON response instead of streaming ndjson.
 func TestImageGenerateStreamFalse(t *testing.T) {
-	t.Setenv("OLLAMA_CONTEXT_LENGTH", "4096")
 	gin.SetMode(gin.TestMode)

 	p := t.TempDir()
@@ -2373,6 +2452,7 @@ func TestImageGenerateStreamFalse(t *testing.T) {

 	opts := api.DefaultOptions()
 	s := Server{
+		usage: NewUsageTracker(),
 		sched: &Scheduler{
 			pendingReqCh:  make(chan *LlmRequest, 1),
 			finishedReqCh: make(chan *LlmRequest, 1),
--- a/server/routes_harmony_streaming_test.go
+++ b/server/routes_harmony_streaming_test.go
@@ -255,6 +255,7 @@ func TestChatHarmonyParserStreamingRealtime(t *testing.T) {
 			}

 			s := Server{
+				usage: NewUsageTracker(),
 				sched: &Scheduler{
 					pendingReqCh:    make(chan *LlmRequest, 1),
 					finishedReqCh:   make(chan *LlmRequest, 1),
@@ -406,6 +407,7 @@ func TestChatHarmonyParserStreamingSimple(t *testing.T) {
 	}

 	s := Server{
+		usage: NewUsageTracker(),
 		sched: &Scheduler{
 			pendingReqCh:    make(chan *LlmRequest, 1),
 			finishedReqCh:   make(chan *LlmRequest, 1),
@@ -588,6 +590,7 @@ func TestChatHarmonyParserStreaming(t *testing.T) {
 			}

 			s := Server{
+				usage: NewUsageTracker(),
 				sched: &Scheduler{
 					pendingReqCh:    make(chan *LlmRequest, 1),
 					finishedReqCh:   make(chan *LlmRequest, 1),
--- a/server/routes_options_test.go
+++ b/server/routes_options_test.go
@@ -1,127 +0,0 @@
-package server
-
-import (
-	"testing"
-)
-
-func TestModelOptionsNumCtxPriority(t *testing.T) {
-	tests := []struct {
-		name           string
-		envContextLen  string // empty means not set (uses 0 sentinel)
-		defaultNumCtx  int    // VRAM-based default
-		modelNumCtx    int    // 0 means not set in model
-		requestNumCtx  int    // 0 means not set in request
-		expectedNumCtx int
-	}{
-		{
-			name:           "vram default when nothing else set",
-			envContextLen:  "",
-			defaultNumCtx:  32768,
-			modelNumCtx:    0,
-			requestNumCtx:  0,
-			expectedNumCtx: 32768,
-		},
-		{
-			name:           "env var overrides vram default",
-			envContextLen:  "8192",
-			defaultNumCtx:  32768,
-			modelNumCtx:    0,
-			requestNumCtx:  0,
-			expectedNumCtx: 8192,
-		},
-		{
-			name:           "model overrides vram default",
-			envContextLen:  "",
-			defaultNumCtx:  32768,
-			modelNumCtx:    16384,
-			requestNumCtx:  0,
-			expectedNumCtx: 16384,
-		},
-		{
-			name:           "model overrides env var",
-			envContextLen:  "8192",
-			defaultNumCtx:  32768,
-			modelNumCtx:    16384,
-			requestNumCtx:  0,
-			expectedNumCtx: 16384,
-		},
-		{
-			name:           "request overrides everything",
-			envContextLen:  "8192",
-			defaultNumCtx:  32768,
-			modelNumCtx:    16384,
-			requestNumCtx:  4096,
-			expectedNumCtx: 4096,
-		},
-		{
-			name:           "request overrides vram default",
-			envContextLen:  "",
-			defaultNumCtx:  32768,
-			modelNumCtx:    0,
-			requestNumCtx:  4096,
-			expectedNumCtx: 4096,
-		},
-		{
-			name:           "request overrides model",
-			envContextLen:  "",
-			defaultNumCtx:  32768,
-			modelNumCtx:    16384,
-			requestNumCtx:  4096,
-			expectedNumCtx: 4096,
-		},
-		{
-			name:           "low vram tier default",
-			envContextLen:  "",
-			defaultNumCtx:  4096,
-			modelNumCtx:    0,
-			requestNumCtx:  0,
-			expectedNumCtx: 4096,
-		},
-		{
-			name:           "high vram tier default",
-			envContextLen:  "",
-			defaultNumCtx:  262144,
-			modelNumCtx:    0,
-			requestNumCtx:  0,
-			expectedNumCtx: 262144,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			// Set or clear environment variable
-			if tt.envContextLen != "" {
-				t.Setenv("OLLAMA_CONTEXT_LENGTH", tt.envContextLen)
-			}
-
-			// Create server with VRAM-based default
-			s := &Server{
-				defaultNumCtx: tt.defaultNumCtx,
-			}
-
-			// Create model options (use float64 as FromMap expects JSON-style numbers)
-			var modelOpts map[string]any
-			if tt.modelNumCtx != 0 {
-				modelOpts = map[string]any{"num_ctx": float64(tt.modelNumCtx)}
-			}
-			model := &Model{
-				Options: modelOpts,
-			}
-
-			// Create request options (use float64 as FromMap expects JSON-style numbers)
-			var requestOpts map[string]any
-			if tt.requestNumCtx != 0 {
-				requestOpts = map[string]any{"num_ctx": float64(tt.requestNumCtx)}
-			}
-
-			opts, err := s.modelOptions(model, requestOpts)
-			if err != nil {
-				t.Fatalf("modelOptions failed: %v", err)
-			}
-
-			if opts.NumCtx != tt.expectedNumCtx {
-				t.Errorf("NumCtx = %d, want %d", opts.NumCtx, tt.expectedNumCtx)
-			}
-		})
-	}
-}
--- a/server/sched_test.go
+++ b/server/sched_test.go
@@ -804,7 +804,6 @@ func (s *mockLlm) GetPort() int                                       { return -
 func (s *mockLlm) GetDeviceInfos(ctx context.Context) []ml.DeviceInfo { return nil }
 func (s *mockLlm) HasExited() bool                                    { return false }
 func (s *mockLlm) GetActiveDeviceIDs() []ml.DeviceID                  { return nil }
-func (s *mockLlm) ContextLength() int                                 { return 0 }

 // TestImageGenRunnerCanBeEvicted verifies that an image generation model
 // loaded in the scheduler can be evicted when idle.
--- a/server/usage.go
+++ b/server/usage.go
@@ -0,0 +1,62 @@
+package server
+
+import (
+	"sync"
+	"time"
+
+	"github.com/ollama/ollama/api"
+)
+
+type ModelUsage struct {
+	Requests         int64
+	PromptTokens     int64
+	CompletionTokens int64
+}
+
+type UsageTracker struct {
+	mu     sync.Mutex
+	start  time.Time
+	models map[string]*ModelUsage
+}
+
+func NewUsageTracker() *UsageTracker {
+	return &UsageTracker{
+		start:  time.Now().UTC(),
+		models: make(map[string]*ModelUsage),
+	}
+}
+
+func (u *UsageTracker) Record(model string, promptTokens, completionTokens int) {
+	u.mu.Lock()
+	defer u.mu.Unlock()
+
+	m, ok := u.models[model]
+	if !ok {
+		m = &ModelUsage{}
+		u.models[model] = m
+	}
+
+	m.Requests++
+	m.PromptTokens += int64(promptTokens)
+	m.CompletionTokens += int64(completionTokens)
+}
+
+func (u *UsageTracker) Stats() api.UsageResponse {
+	u.mu.Lock()
+	defer u.mu.Unlock()
+
+	byModel := make([]api.ModelUsageData, 0, len(u.models))
+	for model, usage := range u.models {
+		byModel = append(byModel, api.ModelUsageData{
+			Model:            model,
+			Requests:         usage.Requests,
+			PromptTokens:     usage.PromptTokens,
+			CompletionTokens: usage.CompletionTokens,
+		})
+	}
+
+	return api.UsageResponse{
+		Start: u.start,
+		Usage: byModel,
+	}
+}
--- a/server/usage_test.go
+++ b/server/usage_test.go
@@ -0,0 +1,136 @@
+package server
+
+import (
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"sync"
+	"testing"
+
+	"github.com/gin-gonic/gin"
+	"github.com/ollama/ollama/api"
+)
+
+func TestUsageTrackerRecord(t *testing.T) {
+	tracker := NewUsageTracker()
+
+	tracker.Record("model-a", 10, 20)
+	tracker.Record("model-a", 5, 15)
+	tracker.Record("model-b", 100, 200)
+
+	stats := tracker.Stats()
+
+	if len(stats.Usage) != 2 {
+		t.Fatalf("expected 2 models, got %d", len(stats.Usage))
+	}
+
+	lookup := make(map[string]api.ModelUsageData)
+	for _, m := range stats.Usage {
+		lookup[m.Model] = m
+	}
+
+	a := lookup["model-a"]
+	if a.Requests != 2 {
+		t.Errorf("model-a requests: expected 2, got %d", a.Requests)
+	}
+	if a.PromptTokens != 15 {
+		t.Errorf("model-a prompt tokens: expected 15, got %d", a.PromptTokens)
+	}
+	if a.CompletionTokens != 35 {
+		t.Errorf("model-a completion tokens: expected 35, got %d", a.CompletionTokens)
+	}
+
+	b := lookup["model-b"]
+	if b.Requests != 1 {
+		t.Errorf("model-b requests: expected 1, got %d", b.Requests)
+	}
+	if b.PromptTokens != 100 {
+		t.Errorf("model-b prompt tokens: expected 100, got %d", b.PromptTokens)
+	}
+	if b.CompletionTokens != 200 {
+		t.Errorf("model-b completion tokens: expected 200, got %d", b.CompletionTokens)
+	}
+}
+
+func TestUsageTrackerConcurrent(t *testing.T) {
+	tracker := NewUsageTracker()
+
+	var wg sync.WaitGroup
+	for range 100 {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			tracker.Record("model-a", 1, 2)
+		}()
+	}
+	wg.Wait()
+
+	stats := tracker.Stats()
+	if len(stats.Usage) != 1 {
+		t.Fatalf("expected 1 model, got %d", len(stats.Usage))
+	}
+
+	m := stats.Usage[0]
+	if m.Requests != 100 {
+		t.Errorf("requests: expected 100, got %d", m.Requests)
+	}
+	if m.PromptTokens != 100 {
+		t.Errorf("prompt tokens: expected 100, got %d", m.PromptTokens)
+	}
+	if m.CompletionTokens != 200 {
+		t.Errorf("completion tokens: expected 200, got %d", m.CompletionTokens)
+	}
+}
+
+func TestUsageTrackerStart(t *testing.T) {
+	tracker := NewUsageTracker()
+
+	stats := tracker.Stats()
+	if stats.Start.IsZero() {
+		t.Error("expected non-zero start time")
+	}
+}
+
+func TestUsageHandler(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+
+	s := &Server{
+		usage: NewUsageTracker(),
+	}
+
+	s.usage.Record("llama3", 50, 100)
+	s.usage.Record("llama3", 25, 50)
+
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+	c.Request = httptest.NewRequest(http.MethodGet, "/api/usage", nil)
+
+	s.UsageHandler(c)
+
+	if w.Code != http.StatusOK {
+		t.Fatalf("expected status 200, got %d", w.Code)
+	}
+
+	var resp api.UsageResponse
+	if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
+		t.Fatalf("failed to unmarshal response: %v", err)
+	}
+
+	if len(resp.Usage) != 1 {
+		t.Fatalf("expected 1 model, got %d", len(resp.Usage))
+	}
+
+	m := resp.Usage[0]
+	if m.Model != "llama3" {
+		t.Errorf("expected model llama3, got %s", m.Model)
+	}
+	if m.Requests != 2 {
+		t.Errorf("expected 2 requests, got %d", m.Requests)
+	}
+	if m.PromptTokens != 75 {
+		t.Errorf("expected 75 prompt tokens, got %d", m.PromptTokens)
+	}
+	if m.CompletionTokens != 150 {
+		t.Errorf("expected 150 completion tokens, got %d", m.CompletionTokens)
+	}
+}
--- a/x/imagegen/server.go
+++ b/x/imagegen/server.go
@@ -347,11 +347,6 @@ func (s *Server) VRAMByGPU(id ml.DeviceID) uint64 {
 	return s.vramSize
 }

-// Context length is not applicable for image generation.
-func (s *Server) ContextLength() int {
-	return 0
-}
-
 func (s *Server) Embedding(ctx context.Context, input string) ([]float32, int, error) {
 	return nil, 0, errors.New("not supported")
 }
Author	SHA1	Message	Date
Bruce MacDonald	c0496e6125	fix lint	2026-01-28 13:16:52 -08:00
Bruce MacDonald	2d57bcbc64	fix tests	2026-01-28 13:07:48 -08:00
Bruce MacDonald	060f9341c0	server: usage api Add a new /api/usage endpoint that shows aggregate usage statistics per model since the server started.	2026-01-27 17:01:18 -08:00