Compare commits

...

1 Commits

Author SHA1 Message Date
jmorganca
846f3fbcc8 app: expose server's default context length to UI
Parse the default_num_ctx from the server's "vram-based default context"
log line and expose it through the inference compute API. This eliminates
duplicate VRAM tier calculation logic in the frontend.

- Add InferenceInfo struct with Computes and DefaultContextLength
- Rename GetInferenceComputer to GetInferenceInfo
- Handle missing default context line gracefully (older servers)
- Add DefaultContextLength to InferenceComputeResponse
- Update Settings UI to use server's default, disable slider while loading
- Add disabled prop to Slider component (grays out + hides handle)
- Migrate existing users with context_length=4096 to 0 (auto mode)
2026-02-02 16:25:29 -08:00
11 changed files with 133 additions and 55 deletions

View File

@@ -41,6 +41,11 @@ type InferenceCompute struct {
VRAM string
}
type InferenceInfo struct {
Computes []InferenceCompute
DefaultContextLength int
}
func New(s *store.Store, devMode bool) *Server {
p := resolvePath("ollama")
return &Server{store: s, bin: p, dev: devMode}
@@ -262,9 +267,12 @@ func openRotatingLog() (io.WriteCloser, error) {
// Attempt to retrieve inference compute information from the server
// log. Set ctx to timeout to control how long to wait for the logs to appear
func GetInferenceComputer(ctx context.Context) ([]InferenceCompute, error) {
inference := []InferenceCompute{}
marker := regexp.MustCompile(`inference compute.*library=`)
func GetInferenceInfo(ctx context.Context) (*InferenceInfo, error) {
info := &InferenceInfo{}
computeMarker := regexp.MustCompile(`inference compute.*library=`)
defaultCtxMarker := regexp.MustCompile(`vram-based default context`)
defaultCtxRegex := regexp.MustCompile(`default_num_ctx=(\d+)`)
q := `inference compute.*%s=["]([^"]*)["]`
nq := `inference compute.*%s=(\S+)\s`
type regex struct {
@@ -330,8 +338,8 @@ func GetInferenceComputer(ctx context.Context) ([]InferenceCompute, error) {
scanner := bufio.NewScanner(file)
for scanner.Scan() {
line := scanner.Text()
match := marker.FindStringSubmatch(line)
if len(match) > 0 {
// Check for inference compute lines
if computeMarker.MatchString(line) {
ic := InferenceCompute{
Library: get("library", line),
Variant: get("variant", line),
@@ -342,12 +350,25 @@ func GetInferenceComputer(ctx context.Context) ([]InferenceCompute, error) {
}
slog.Info("Matched", "inference compute", ic)
inference = append(inference, ic)
} else {
// Break out on first non matching line after we start matching
if len(inference) > 0 {
return inference, nil
info.Computes = append(info.Computes, ic)
continue
}
// Check for default context length line
if defaultCtxMarker.MatchString(line) {
match := defaultCtxRegex.FindStringSubmatch(line)
if len(match) > 1 {
numCtx, err := strconv.Atoi(match[1])
if err == nil {
info.DefaultContextLength = numCtx
slog.Info("Matched default context length", "default_num_ctx", numCtx)
}
}
return info, nil
}
// If we've found compute info but hit a non-matching line, return what we have
// This handles older server versions that don't log the default context line
if len(info.Computes) > 0 {
return info, nil
}
}
time.Sleep(100 * time.Millisecond)

View File

@@ -136,44 +136,50 @@ func TestServerCmd(t *testing.T) {
}
}
func TestGetInferenceComputer(t *testing.T) {
func TestGetInferenceInfo(t *testing.T) {
tests := []struct {
name string
log string
exp []InferenceCompute
name string
log string
expComputes []InferenceCompute
expDefaultCtxLen int
}{
{
name: "metal",
log: `time=2025-06-30T09:23:07.374-07:00 level=DEBUG source=sched.go:108 msg="starting llm scheduler"
time=2025-06-30T09:23:07.416-07:00 level=INFO source=types.go:130 msg="inference compute" id=0 library=metal variant="" compute="" driver=0.0 name="" total="96.0 GiB" available="96.0 GiB"
time=2025-06-30T09:23:07.417-07:00 level=INFO source=routes.go:1721 msg="vram-based default context" total_vram="96.0 GiB" default_num_ctx=262144
time=2025-06-30T09:25:56.197-07:00 level=DEBUG source=ggml.go:155 msg="key not found" key=general.alignment default=32
`,
exp: []InferenceCompute{{
expComputes: []InferenceCompute{{
Library: "metal",
Driver: "0.0",
VRAM: "96.0 GiB",
}},
expDefaultCtxLen: 262144,
},
{
name: "cpu",
log: `time=2025-07-01T17:59:51.470Z level=INFO source=gpu.go:377 msg="no compatible GPUs were discovered"
time=2025-07-01T17:59:51.470Z level=INFO source=types.go:130 msg="inference compute" id=0 library=cpu variant="" compute="" driver=0.0 name="" total="31.3 GiB" available="30.4 GiB"
time=2025-07-01T17:59:51.471Z level=INFO source=routes.go:1721 msg="vram-based default context" total_vram="31.3 GiB" default_num_ctx=32768
[GIN] 2025/07/01 - 18:00:09 | 200 | 50.263µs | 100.126.204.152 | HEAD "/"
`,
exp: []InferenceCompute{{
expComputes: []InferenceCompute{{
Library: "cpu",
Driver: "0.0",
VRAM: "31.3 GiB",
}},
expDefaultCtxLen: 32768,
},
{
name: "cuda1",
log: `time=2025-07-01T19:33:43.162Z level=DEBUG source=amd_linux.go:419 msg="amdgpu driver not detected /sys/module/amdgpu"
releasing cuda driver library
time=2025-07-01T19:33:43.162Z level=INFO source=types.go:130 msg="inference compute" id=GPU-452cac9f-6960-839c-4fb3-0cec83699196 library=cuda variant=v12 compute=6.1 driver=12.7 name="NVIDIA GeForce GT 1030" total="3.9 GiB" available="3.9 GiB"
time=2025-07-01T19:33:43.163Z level=INFO source=routes.go:1721 msg="vram-based default context" total_vram="3.9 GiB" default_num_ctx=4096
[GIN] 2025/07/01 - 18:00:09 | 200 | 50.263µs | 100.126.204.152 | HEAD "/"
`,
exp: []InferenceCompute{{
expComputes: []InferenceCompute{{
Library: "cuda",
Variant: "v12",
Compute: "6.1",
@@ -181,6 +187,7 @@ time=2025-07-01T19:33:43.162Z level=INFO source=types.go:130 msg="inference comp
Name: "NVIDIA GeForce GT 1030",
VRAM: "3.9 GiB",
}},
expDefaultCtxLen: 4096,
},
{
name: "frank",
@@ -188,9 +195,10 @@ time=2025-07-01T19:33:43.162Z level=INFO source=types.go:130 msg="inference comp
releasing cuda driver library
time=2025-07-01T19:36:13.315Z level=INFO source=types.go:130 msg="inference compute" id=GPU-d6de3398-9932-6902-11ec-fee8e424c8a2 library=cuda variant=v12 compute=7.5 driver=12.8 name="NVIDIA GeForce RTX 2080 Ti" total="10.6 GiB" available="10.4 GiB"
time=2025-07-01T19:36:13.315Z level=INFO source=types.go:130 msg="inference compute" id=GPU-9abb57639fa80c50 library=rocm variant="" compute=gfx1030 driver=6.3 name=1002:73bf total="16.0 GiB" available="1.3 GiB"
time=2025-07-01T19:36:13.316Z level=INFO source=routes.go:1721 msg="vram-based default context" total_vram="26.6 GiB" default_num_ctx=32768
[GIN] 2025/07/01 - 18:00:09 | 200 | 50.263µs | 100.126.204.152 | HEAD "/"
`,
exp: []InferenceCompute{
expComputes: []InferenceCompute{
{
Library: "cuda",
Variant: "v12",
@@ -207,6 +215,20 @@ time=2025-07-01T19:33:43.162Z level=INFO source=types.go:130 msg="inference comp
VRAM: "16.0 GiB",
},
},
expDefaultCtxLen: 32768,
},
{
name: "missing_default_context",
log: `time=2025-06-30T09:23:07.374-07:00 level=DEBUG source=sched.go:108 msg="starting llm scheduler"
time=2025-06-30T09:23:07.416-07:00 level=INFO source=types.go:130 msg="inference compute" id=0 library=metal variant="" compute="" driver=0.0 name="" total="96.0 GiB" available="96.0 GiB"
time=2025-06-30T09:25:56.197-07:00 level=DEBUG source=ggml.go:155 msg="key not found" key=general.alignment default=32
`,
expComputes: []InferenceCompute{{
Library: "metal",
Driver: "0.0",
VRAM: "96.0 GiB",
}},
expDefaultCtxLen: 0, // No default context line, should return 0
},
}
for _, tt := range tests {
@@ -219,18 +241,21 @@ time=2025-07-01T19:33:43.162Z level=INFO source=types.go:130 msg="inference comp
}
ctx, cancel := context.WithTimeout(t.Context(), 10*time.Millisecond)
defer cancel()
ics, err := GetInferenceComputer(ctx)
info, err := GetInferenceInfo(ctx)
if err != nil {
t.Fatalf(" failed to get inference compute: %v", err)
t.Fatalf("failed to get inference info: %v", err)
}
if !reflect.DeepEqual(ics, tt.exp) {
t.Fatalf("got:\n%#v\nwant:\n%#v", ics, tt.exp)
if !reflect.DeepEqual(info.Computes, tt.expComputes) {
t.Fatalf("computes mismatch\ngot:\n%#v\nwant:\n%#v", info.Computes, tt.expComputes)
}
if info.DefaultContextLength != tt.expDefaultCtxLen {
t.Fatalf("default context length mismatch: got %d, want %d", info.DefaultContextLength, tt.expDefaultCtxLen)
}
})
}
}
func TestGetInferenceComputerTimeout(t *testing.T) {
func TestGetInferenceInfoTimeout(t *testing.T) {
ctx, cancel := context.WithTimeout(t.Context(), 10*time.Millisecond)
defer cancel()
tmpDir := t.TempDir()
@@ -239,7 +264,7 @@ func TestGetInferenceComputerTimeout(t *testing.T) {
if err != nil {
t.Fatalf("failed to write log file %s: %s", serverLogPath, err)
}
_, err = GetInferenceComputer(ctx)
_, err = GetInferenceInfo(ctx)
if err == nil {
t.Fatal("expected timeout")
}

View File

@@ -14,7 +14,7 @@ import (
// currentSchemaVersion defines the current database schema version.
// Increment this when making schema changes that require migrations.
const currentSchemaVersion = 12
const currentSchemaVersion = 13
// database wraps the SQLite connection.
// SQLite handles its own locking for concurrent access:
@@ -73,7 +73,7 @@ func (db *database) init() error {
agent BOOLEAN NOT NULL DEFAULT 0,
tools BOOLEAN NOT NULL DEFAULT 0,
working_dir TEXT NOT NULL DEFAULT '',
context_length INTEGER NOT NULL DEFAULT 4096,
context_length INTEGER NOT NULL DEFAULT 0,
window_width INTEGER NOT NULL DEFAULT 0,
window_height INTEGER NOT NULL DEFAULT 0,
config_migrated BOOLEAN NOT NULL DEFAULT 0,
@@ -244,6 +244,12 @@ func (db *database) migrate() error {
return fmt.Errorf("migrate v11 to v12: %w", err)
}
version = 12
case 12:
// change default context_length from 4096 to 0 (VRAM-based tiered defaults)
if err := db.migrateV12ToV13(); err != nil {
return fmt.Errorf("migrate v12 to v13: %w", err)
}
version = 13
default:
// If we have a version we don't recognize, just set it to current
// This might happen during development
@@ -452,6 +458,23 @@ func (db *database) migrateV11ToV12() error {
return nil
}
// migrateV12ToV13 changes the default context_length from 4096 to 0
// When context_length is 0, the ollama server uses VRAM-based tiered defaults
func (db *database) migrateV12ToV13() error {
// Update users who have the old default of 4096 to the new default of 0
_, err := db.conn.Exec(`UPDATE settings SET context_length = 0 WHERE context_length = 4096`)
if err != nil {
return fmt.Errorf("update context_length default: %w", err)
}
_, err = db.conn.Exec(`UPDATE settings SET schema_version = 13`)
if err != nil {
return fmt.Errorf("update schema version: %w", err)
}
return nil
}
// cleanupOrphanedData removes orphaned records that may exist due to the foreign key bug
func (db *database) cleanupOrphanedData() error {
_, err := db.conn.Exec(`

View File

@@ -13,7 +13,7 @@ CREATE TABLE IF NOT EXISTS settings (
agent BOOLEAN NOT NULL DEFAULT 0,
tools BOOLEAN NOT NULL DEFAULT 0,
working_dir TEXT NOT NULL DEFAULT '',
context_length INTEGER NOT NULL DEFAULT 4096,
context_length INTEGER NOT NULL DEFAULT 0,
window_width INTEGER NOT NULL DEFAULT 0,
window_height INTEGER NOT NULL DEFAULT 0,
config_migrated BOOLEAN NOT NULL DEFAULT 0,

View File

@@ -289,10 +289,12 @@ export class InferenceCompute {
}
export class InferenceComputeResponse {
inferenceComputes: InferenceCompute[];
defaultContextLength: number;
constructor(source: any = {}) {
if ('string' === typeof source) source = JSON.parse(source);
this.inferenceComputes = this.convertValues(source["inferenceComputes"], InferenceCompute);
this.defaultContextLength = source["defaultContextLength"];
}
convertValues(a: any, classs: any, asMap: boolean = false): any {

View File

@@ -4,7 +4,6 @@ import {
ChatEvent,
DownloadEvent,
ErrorEvent,
InferenceCompute,
InferenceComputeResponse,
ModelCapabilitiesResponse,
Model,
@@ -379,7 +378,7 @@ export async function* pullModel(
}
}
export async function getInferenceCompute(): Promise<InferenceCompute[]> {
export async function getInferenceCompute(): Promise<InferenceComputeResponse> {
const response = await fetch(`${API_BASE}/api/v1/inference-compute`);
if (!response.ok) {
throw new Error(
@@ -388,8 +387,7 @@ export async function getInferenceCompute(): Promise<InferenceCompute[]> {
}
const data = await response.json();
const inferenceComputeResponse = new InferenceComputeResponse(data);
return inferenceComputeResponse.inferenceComputes || [];
return new InferenceComputeResponse(data);
}
export async function fetchHealth(): Promise<boolean> {

View File

@@ -19,7 +19,7 @@ import { Settings as SettingsType } from "@/gotypes";
import { useNavigate } from "@tanstack/react-router";
import { useUser } from "@/hooks/useUser";
import { useQuery, useMutation, useQueryClient } from "@tanstack/react-query";
import { getSettings, updateSettings } from "@/api";
import { getSettings, updateSettings, getInferenceCompute } from "@/api";
function AnimatedDots() {
return (
@@ -65,6 +65,13 @@ export default function Settings() {
const settings = settingsData?.settings || null;
const { data: inferenceComputeResponse } = useQuery({
queryKey: ["inferenceCompute"],
queryFn: getInferenceCompute,
});
const defaultContextLength = inferenceComputeResponse?.defaultContextLength;
const updateSettingsMutation = useMutation({
mutationFn: updateSettings,
onSuccess: () => {
@@ -148,7 +155,7 @@ export default function Settings() {
Models: "",
Agent: false,
Tools: false,
ContextLength: 4096,
ContextLength: 0,
AirplaneMode: false,
});
updateSettingsMutation.mutate(defaultSettings);
@@ -419,13 +426,11 @@ export default function Settings() {
</Description>
<div className="mt-3">
<Slider
value={(() => {
// Otherwise use the settings value
return settings.ContextLength || 4096;
})()}
value={settings.ContextLength || defaultContextLength || 0}
onChange={(value) => {
handleChange("ContextLength", value);
}}
disabled={!defaultContextLength}
options={[
{ value: 4096, label: "4k" },
{ value: 8192, label: "8k" },
@@ -440,6 +445,7 @@ export default function Settings() {
</div>
</div>
</Field>
{/* Airplane Mode */}
<Field>
<div className="flex items-start justify-between gap-4">

View File

@@ -6,10 +6,11 @@ export interface SliderProps {
value?: number;
onChange?: (value: number) => void;
className?: string;
disabled?: boolean;
}
const Slider = React.forwardRef<HTMLDivElement, SliderProps>(
({ label, options, value = 0, onChange }, ref) => {
({ label, options, value = 0, onChange, disabled = false }, ref) => {
const [selectedValue, setSelectedValue] = React.useState(value);
const [isDragging, setIsDragging] = React.useState(false);
const containerRef = React.useRef<HTMLDivElement>(null);
@@ -20,6 +21,7 @@ const Slider = React.forwardRef<HTMLDivElement, SliderProps>(
}, [value]);
const handleClick = (optionValue: number) => {
if (disabled) return;
setSelectedValue(optionValue);
onChange?.(optionValue);
};
@@ -39,6 +41,7 @@ const Slider = React.forwardRef<HTMLDivElement, SliderProps>(
};
const handleMouseDown = (e: React.MouseEvent) => {
if (disabled) return;
setIsDragging(true);
e.preventDefault();
};
@@ -77,7 +80,7 @@ const Slider = React.forwardRef<HTMLDivElement, SliderProps>(
}
return (
<div className="space-y-2" ref={ref}>
<div className={`space-y-2 ${disabled ? "opacity-50" : ""}`} ref={ref}>
{label && <label className="text-sm font-medium">{label}</label>}
<div className="relative">
<div className="absolute top-[9px] left-2 right-2 h-1 bg-neutral-200 dark:bg-neutral-700 pointer-events-none rounded-full" />
@@ -88,10 +91,11 @@ const Slider = React.forwardRef<HTMLDivElement, SliderProps>(
<button
onClick={() => handleClick(option.value)}
onMouseDown={handleMouseDown}
className="relative px-3 py-6 -mx-3 -my-6 z-10 cursor-pointer"
disabled={disabled}
className={`relative px-3 py-6 -mx-3 -my-6 z-10 ${disabled ? "cursor-not-allowed" : "cursor-pointer"}`}
>
<div className="relative w-5 h-5 flex items-center justify-center">
{selectedValue === option.value && (
{selectedValue === option.value && !disabled && (
<div className="w-4 h-4 bg-white dark:bg-white border border-neutral-400 dark:border-neutral-500 rounded-full cursor-grab active:cursor-grabbing" />
)}
</div>

View File

@@ -26,12 +26,14 @@ export function useSelectedModel(currentChatId?: string, searchQuery?: string) {
currentChatId && currentChatId !== "new" ? currentChatId : "",
);
const { data: inferenceComputes = [] } = useQuery({
queryKey: ["inference-compute"],
const { data: inferenceComputeResponse } = useQuery({
queryKey: ["inferenceCompute"],
queryFn: getInferenceCompute,
enabled: !settings.selectedModel, // Only fetch if no model is selected
});
const inferenceComputes = inferenceComputeResponse?.inferenceComputes || [];
const totalVRAM = useMemo(
() => getTotalVRAM(inferenceComputes),
[inferenceComputes],

View File

@@ -45,7 +45,8 @@ type InferenceCompute struct {
}
type InferenceComputeResponse struct {
InferenceComputes []InferenceCompute `json:"inferenceComputes"`
InferenceComputes []InferenceCompute `json:"inferenceComputes"`
DefaultContextLength int `json:"defaultContextLength"`
}
type ModelCapabilitiesResponse struct {

View File

@@ -1417,11 +1417,6 @@ func (s *Server) getSettings(w http.ResponseWriter, r *http.Request) error {
settings.Models = envconfig.Models()
}
// set default context length if not set
if settings.ContextLength == 0 {
settings.ContextLength = 4096
}
// Include current runtime settings
settings.Agent = s.Agent
settings.Tools = s.Tools
@@ -1463,14 +1458,14 @@ func (s *Server) settings(w http.ResponseWriter, r *http.Request) error {
func (s *Server) getInferenceCompute(w http.ResponseWriter, r *http.Request) error {
ctx, cancel := context.WithTimeout(r.Context(), 500*time.Millisecond)
defer cancel()
serverInferenceComputes, err := server.GetInferenceComputer(ctx)
info, err := server.GetInferenceInfo(ctx)
if err != nil {
s.log().Error("failed to get inference compute", "error", err)
return fmt.Errorf("failed to get inference compute: %w", err)
s.log().Error("failed to get inference info", "error", err)
return fmt.Errorf("failed to get inference info: %w", err)
}
inferenceComputes := make([]responses.InferenceCompute, len(serverInferenceComputes))
for i, ic := range serverInferenceComputes {
inferenceComputes := make([]responses.InferenceCompute, len(info.Computes))
for i, ic := range info.Computes {
inferenceComputes[i] = responses.InferenceCompute{
Library: ic.Library,
Variant: ic.Variant,
@@ -1482,7 +1477,8 @@ func (s *Server) getInferenceCompute(w http.ResponseWriter, r *http.Request) err
}
response := responses.InferenceComputeResponse{
InferenceComputes: inferenceComputes,
InferenceComputes: inferenceComputes,
DefaultContextLength: info.DefaultContextLength,
}
w.Header().Set("Content-Type", "application/json")