envconfig: usage template

2026-01-18 20:39:13 -05:00 · 2025-11-18 13:49:42 -08:00
5 changed files with 280 additions and 182 deletions
--- a/cmd/cmd.go
+++ b/cmd/cmd.go
@@ -5,6 +5,7 @@ import (
 	"context"
 	"crypto/ed25519"
 	"crypto/rand"
+	_ "embed"
 	"encoding/json"
 	"encoding/pem"
 	"errors"
@@ -47,6 +48,9 @@ import (
 	"github.com/ollama/ollama/version"
 )

+//go:embed usage.gotmpl
+var usageTemplate string
+
 const ConnectInstructions = "To sign in, navigate to:\n    %s\n\n"

 // ensureThinkingSupport emits a warning if the model does not advertise thinking support
@@ -1664,21 +1668,6 @@ func versionHandler(cmd *cobra.Command, _ []string) {
 	}
 }

-func appendEnvDocs(cmd *cobra.Command, envs []envconfig.EnvVar) {
-	if len(envs) == 0 {
-		return
-	}
-
-	envUsage := `
-Environment Variables:
-`
-	for _, e := range envs {
-		envUsage += fmt.Sprintf("      %-24s   %s\n", e.Name, e.Description)
-	}
-
-	cmd.SetUsageTemplate(cmd.UsageTemplate() + envUsage)
-}
-
 func NewCLI() *cobra.Command {
 	log.SetFlags(log.LstdFlags | log.Lshortfile)
 	cobra.EnableCommandSorting = false
@@ -1708,22 +1697,24 @@ func NewCLI() *cobra.Command {
 	rootCmd.Flags().BoolP("version", "v", false, "Show version information")

 	createCmd := &cobra.Command{
-		Use:     "create MODEL",
-		Short:   "Create a model",
-		Args:    cobra.ExactArgs(1),
-		PreRunE: checkServerHeartbeat,
-		RunE:    CreateHandler,
+		Use:         "create MODEL",
+		Short:       "Create a model",
+		Args:        cobra.ExactArgs(1),
+		PreRunE:     checkServerHeartbeat,
+		RunE:        CreateHandler,
+		Annotations: envconfig.Usage("OLLAMA_HOST"),
 	}

 	createCmd.Flags().StringP("file", "f", "", "Name of the Modelfile (default \"Modelfile\")")
 	createCmd.Flags().StringP("quantize", "q", "", "Quantize model to this level (e.g. q4_K_M)")

 	showCmd := &cobra.Command{
-		Use:     "show MODEL",
-		Short:   "Show information for a model",
-		Args:    cobra.ExactArgs(1),
-		PreRunE: checkServerHeartbeat,
-		RunE:    ShowHandler,
+		Use:         "show MODEL",
+		Short:       "Show information for a model",
+		Args:        cobra.ExactArgs(1),
+		PreRunE:     checkServerHeartbeat,
+		RunE:        ShowHandler,
+		Annotations: envconfig.Usage("OLLAMA_HOST"),
 	}

 	showCmd.Flags().Bool("license", false, "Show license of a model")
@@ -1734,11 +1725,12 @@ func NewCLI() *cobra.Command {
 	showCmd.Flags().BoolP("verbose", "v", false, "Show detailed model information")

 	runCmd := &cobra.Command{
-		Use:     "run MODEL [PROMPT]",
-		Short:   "Run a model",
-		Args:    cobra.MinimumNArgs(1),
-		PreRunE: checkServerHeartbeat,
-		RunE:    RunHandler,
+		Use:         "run MODEL [PROMPT]",
+		Short:       "Run a model",
+		Args:        cobra.MinimumNArgs(1),
+		PreRunE:     checkServerHeartbeat,
+		RunE:        RunHandler,
+		Annotations: envconfig.Usage("OLLAMA_HOST", "OLLAMA_NOHISTORY"),
 	}

 	runCmd.Flags().String("keepalive", "", "Duration to keep a model loaded (e.g. 5m)")
@@ -1753,11 +1745,12 @@ func NewCLI() *cobra.Command {
 	runCmd.Flags().Int("dimensions", 0, "Truncate output embeddings to specified dimension (embedding models only)")

 	stopCmd := &cobra.Command{
-		Use:     "stop MODEL",
-		Short:   "Stop a running model",
-		Args:    cobra.ExactArgs(1),
-		PreRunE: checkServerHeartbeat,
-		RunE:    StopHandler,
+		Use:         "stop MODEL",
+		Short:       "Stop a running model",
+		Args:        cobra.ExactArgs(1),
+		PreRunE:     checkServerHeartbeat,
+		RunE:        StopHandler,
+		Annotations: envconfig.Usage("OLLAMA_HOST"),
 	}

 	serveCmd := &cobra.Command{
@@ -1766,24 +1759,44 @@ func NewCLI() *cobra.Command {
 		Short:   "Start ollama",
 		Args:    cobra.ExactArgs(0),
 		RunE:    RunServer,
+		Annotations: envconfig.Usage(
+			"OLLAMA_DEBUG",
+			"OLLAMA_HOST",
+			"OLLAMA_CONTEXT_LENGTH",
+			"OLLAMA_KEEP_ALIVE",
+			"OLLAMA_MAX_LOADED_MODELS",
+			"OLLAMA_MAX_QUEUE",
+			"OLLAMA_MODELS",
+			"OLLAMA_NUM_PARALLEL",
+			"OLLAMA_NOPRUNE",
+			"OLLAMA_ORIGINS",
+			"OLLAMA_SCHED_SPREAD",
+			"OLLAMA_FLASH_ATTENTION",
+			"OLLAMA_KV_CACHE_TYPE",
+			"OLLAMA_LLM_LIBRARY",
+			"OLLAMA_GPU_OVERHEAD",
+			"OLLAMA_LOAD_TIMEOUT",
+		),
 	}

 	pullCmd := &cobra.Command{
-		Use:     "pull MODEL",
-		Short:   "Pull a model from a registry",
-		Args:    cobra.ExactArgs(1),
-		PreRunE: checkServerHeartbeat,
-		RunE:    PullHandler,
+		Use:         "pull MODEL",
+		Short:       "Pull a model from a registry",
+		Args:        cobra.ExactArgs(1),
+		PreRunE:     checkServerHeartbeat,
+		RunE:        PullHandler,
+		Annotations: envconfig.Usage("OLLAMA_HOST"),
 	}

 	pullCmd.Flags().Bool("insecure", false, "Use an insecure registry")

 	pushCmd := &cobra.Command{
-		Use:     "push MODEL",
-		Short:   "Push a model to a registry",
-		Args:    cobra.ExactArgs(1),
-		PreRunE: checkServerHeartbeat,
-		RunE:    PushHandler,
+		Use:         "push MODEL",
+		Short:       "Push a model to a registry",
+		Args:        cobra.ExactArgs(1),
+		PreRunE:     checkServerHeartbeat,
+		RunE:        PushHandler,
+		Annotations: envconfig.Usage("OLLAMA_HOST"),
 	}

 	pushCmd.Flags().Bool("insecure", false, "Use an insecure registry")
@@ -1805,33 +1818,37 @@ func NewCLI() *cobra.Command {
 	}

 	listCmd := &cobra.Command{
-		Use:     "list",
-		Aliases: []string{"ls"},
-		Short:   "List models",
-		PreRunE: checkServerHeartbeat,
-		RunE:    ListHandler,
+		Use:         "list",
+		Aliases:     []string{"ls"},
+		Short:       "List models",
+		PreRunE:     checkServerHeartbeat,
+		RunE:        ListHandler,
+		Annotations: envconfig.Usage("OLLAMA_HOST"),
 	}

 	psCmd := &cobra.Command{
-		Use:     "ps",
-		Short:   "List running models",
-		PreRunE: checkServerHeartbeat,
-		RunE:    ListRunningHandler,
+		Use:         "ps",
+		Short:       "List running models",
+		PreRunE:     checkServerHeartbeat,
+		RunE:        ListRunningHandler,
+		Annotations: envconfig.Usage("OLLAMA_HOST"),
 	}
 	copyCmd := &cobra.Command{
-		Use:     "cp SOURCE DESTINATION",
-		Short:   "Copy a model",
-		Args:    cobra.ExactArgs(2),
-		PreRunE: checkServerHeartbeat,
-		RunE:    CopyHandler,
+		Use:         "cp SOURCE DESTINATION",
+		Short:       "Copy a model",
+		Args:        cobra.ExactArgs(2),
+		PreRunE:     checkServerHeartbeat,
+		RunE:        CopyHandler,
+		Annotations: envconfig.Usage("OLLAMA_HOST"),
 	}

 	deleteCmd := &cobra.Command{
-		Use:     "rm MODEL [MODEL...]",
-		Short:   "Remove a model",
-		Args:    cobra.MinimumNArgs(1),
-		PreRunE: checkServerHeartbeat,
-		RunE:    DeleteHandler,
+		Use:         "rm MODEL [MODEL...]",
+		Short:       "Remove a model",
+		Args:        cobra.MinimumNArgs(1),
+		PreRunE:     checkServerHeartbeat,
+		RunE:        DeleteHandler,
+		Annotations: envconfig.Usage("OLLAMA_HOST"),
 	}

 	runnerCmd := &cobra.Command{
@@ -1846,50 +1863,6 @@ func NewCLI() *cobra.Command {
 		_ = runner.Execute(args[1:])
 	})

-	envVars := envconfig.AsMap()
-
-	envs := []envconfig.EnvVar{envVars["OLLAMA_HOST"]}
-
-	for _, cmd := range []*cobra.Command{
-		createCmd,
-		showCmd,
-		runCmd,
-		stopCmd,
-		pullCmd,
-		pushCmd,
-		listCmd,
-		psCmd,
-		copyCmd,
-		deleteCmd,
-		serveCmd,
-	} {
-		switch cmd {
-		case runCmd:
-			appendEnvDocs(cmd, []envconfig.EnvVar{envVars["OLLAMA_HOST"], envVars["OLLAMA_NOHISTORY"]})
-		case serveCmd:
-			appendEnvDocs(cmd, []envconfig.EnvVar{
-				envVars["OLLAMA_DEBUG"],
-				envVars["OLLAMA_HOST"],
-				envVars["OLLAMA_CONTEXT_LENGTH"],
-				envVars["OLLAMA_KEEP_ALIVE"],
-				envVars["OLLAMA_MAX_LOADED_MODELS"],
-				envVars["OLLAMA_MAX_QUEUE"],
-				envVars["OLLAMA_MODELS"],
-				envVars["OLLAMA_NUM_PARALLEL"],
-				envVars["OLLAMA_NOPRUNE"],
-				envVars["OLLAMA_ORIGINS"],
-				envVars["OLLAMA_SCHED_SPREAD"],
-				envVars["OLLAMA_FLASH_ATTENTION"],
-				envVars["OLLAMA_KV_CACHE_TYPE"],
-				envVars["OLLAMA_LLM_LIBRARY"],
-				envVars["OLLAMA_GPU_OVERHEAD"],
-				envVars["OLLAMA_LOAD_TIMEOUT"],
-			})
-		default:
-			appendEnvDocs(cmd, envs)
-		}
-	}
-
 	rootCmd.AddCommand(
 		serveCmd,
 		createCmd,
@@ -1907,6 +1880,7 @@ func NewCLI() *cobra.Command {
 		runnerCmd,
 	)

+	rootCmd.SetUsageTemplate(usageTemplate)
 	return rootCmd
 }

--- a/cmd/usage.gotmpl
+++ b/cmd/usage.gotmpl
@@ -0,0 +1,88 @@
+Usage:
+{{- if .Runnable }} {{ .UseLine }}
+{{- end }}
+{{- if .HasAvailableSubCommands }} {{ .CommandPath }} [command]
+{{- end }}
+
+{{- if gt (len .Aliases) 0}}
+
+Aliases:
+  {{ .NameAndAliases }}
+{{- end }}
+
+{{- if .HasExample }}
+
+Examples:
+{{ .Example }}
+{{- end }}
+
+{{- if .HasAvailableSubCommands }}
+{{- if eq (len .Groups) 0}}
+
+Available Commands:
+{{- range .Commands }}
+{{- if or .IsAvailableCommand (eq .Name "help") }}
+  {{ rpad .Name .NamePadding }} {{ .Short }}
+{{- end }}
+{{- end }}
+
+{{- else }}
+
+{{- range .Groups }}
+
+{{ .Title }}
+
+{{- range $.Commands }}
+{{- if and (eq .GroupID .ID) (or .IsAvailableCommand (eq .Name "help")) }}
+  {{ rpad .Name .NamePadding }} {{ .Short }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{- if not .AllChildCommandsHaveGroup }}
+
+Additional Commands:
+{{- range $.Commands }}
+{{- if and (eq .GroupID "") (or .IsAvailableCommand (eq .Name "help")) }}
+  {{ rpad .Name .NamePadding }} {{ .Short }}
+{{- end }}
+{{- end }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{- if .HasAvailableLocalFlags }}
+
+Flags:
+{{ .LocalFlags.FlagUsages | trimTrailingWhitespaces }}
+{{- end }}
+
+{{- if .HasAvailableInheritedFlags }}
+
+Global Flags:
+{{ .InheritedFlags.FlagUsages | trimTrailingWhitespaces }}
+{{- end }}
+
+{{- /* Hijack .Annotations for Environment Variables */ -}}
+{{- if .Annotations }}
+
+Environment Variables:
+{{- range $key, $value := .Annotations }}
+  {{ rpad $key 24 }} {{ $value | trimTrailingWhitespaces }}
+{{- end }}
+{{- end }}
+
+{{- if .HasHelpSubCommands }}
+
+Additional help topics:
+{{- range .Commands }}
+{{- if .IsAdditionalHelpTopicCommand }}
+  {{ rpad .CommandPath .CommandPathPadding }} {{ .Short }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{- if .HasAvailableSubCommands }}
+
+Use "{{ .CommandPath }} [command] --help" for more information about a command.
+{{- end }}
--- a/discover/runner.go
+++ b/discover/runner.go
@@ -67,7 +67,14 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
 		slog.Info("discovering available GPUs...")

 		// Warn if any user-overrides are set which could lead to incorrect GPU discovery
-		overrideWarnings()
+		overrideWarning(
+			"CUDA_VISIBLE_DEVICES",
+			"HIP_VISIBLE_DEVICES",
+			"ROCR_VISIBLE_DEVICES",
+			"GGML_VK_VISIBLE_DEVICES",
+			"GPU_DEVICE_ORDINAL",
+			"HSA_OVERRIDE_GFX_VERSION",
+		)

 		requested := envconfig.LLMLibrary()
 		jetpack := cudaJetpack()
@@ -454,23 +461,20 @@ func bootstrapDevices(ctx context.Context, ollamaLibDirs []string, extraEnvs map
 	return devices
 }

-func overrideWarnings() {
-	anyFound := false
-	m := envconfig.AsMap()
-	for _, k := range []string{
-		"CUDA_VISIBLE_DEVICES",
-		"HIP_VISIBLE_DEVICES",
-		"ROCR_VISIBLE_DEVICES",
-		"GGML_VK_VISIBLE_DEVICES",
-		"GPU_DEVICE_ORDINAL",
-		"HSA_OVERRIDE_GFX_VERSION",
-	} {
-		if e, found := m[k]; found && e.Value != "" {
-			anyFound = true
-			slog.Warn("user overrode visible devices", k, e.Value)
+func overrideWarning(s ...string) {
+	attrs := make([]slog.Attr, 0, len(s))
+	for _, i := range envconfig.Lookup(s...) {
+		if !i.IsZero() {
+			attrs = append(attrs, i.LogValue().Group()...)
 		}
 	}
-	if anyFound {
-		slog.Warn("if GPUs are not correctly discovered, unset and try again")
+
+	if len(attrs) > 0 {
+		slog.LogAttrs(
+			context.TODO(),
+			slog.LevelWarn,
+			"user overrode visible devices; if GPUs are not correctly discovered, unset and try again",
+			attrs...,
+		)
 	}
 }
--- a/envconfig/config.go
+++ b/envconfig/config.go
@@ -8,7 +8,9 @@ import (
 	"net/url"
 	"os"
 	"path/filepath"
+	"reflect"
 	"runtime"
+	"slices"
 	"strconv"
 	"strings"
 	"time"
@@ -265,67 +267,97 @@ func Uint64(key string, defaultValue uint64) func() uint64 {
 // Set aside VRAM per GPU
 var GpuOverhead = Uint64("OLLAMA_GPU_OVERHEAD", 0)

-type EnvVar struct {
-	Name        string
-	Value       any
-	Description string
+type item struct {
+	enable              bool
+	name, usage         string
+	value, defaultValue any
 }

-func AsMap() map[string]EnvVar {
-	ret := map[string]EnvVar{
-		"OLLAMA_DEBUG":             {"OLLAMA_DEBUG", LogLevel(), "Show additional debug information (e.g. OLLAMA_DEBUG=1)"},
-		"OLLAMA_FLASH_ATTENTION":   {"OLLAMA_FLASH_ATTENTION", FlashAttention(false), "Enabled flash attention"},
-		"OLLAMA_KV_CACHE_TYPE":     {"OLLAMA_KV_CACHE_TYPE", KvCacheType(), "Quantization type for the K/V cache (default: f16)"},
-		"OLLAMA_GPU_OVERHEAD":      {"OLLAMA_GPU_OVERHEAD", GpuOverhead(), "Reserve a portion of VRAM per GPU (bytes)"},
-		"OLLAMA_HOST":              {"OLLAMA_HOST", Host(), "IP Address for the ollama server (default 127.0.0.1:11434)"},
-		"OLLAMA_KEEP_ALIVE":        {"OLLAMA_KEEP_ALIVE", KeepAlive(), "The duration that models stay loaded in memory (default \"5m\")"},
-		"OLLAMA_LLM_LIBRARY":       {"OLLAMA_LLM_LIBRARY", LLMLibrary(), "Set LLM library to bypass autodetection"},
-		"OLLAMA_LOAD_TIMEOUT":      {"OLLAMA_LOAD_TIMEOUT", LoadTimeout(), "How long to allow model loads to stall before giving up (default \"5m\")"},
-		"OLLAMA_MAX_LOADED_MODELS": {"OLLAMA_MAX_LOADED_MODELS", MaxRunners(), "Maximum number of loaded models per GPU"},
-		"OLLAMA_MAX_QUEUE":         {"OLLAMA_MAX_QUEUE", MaxQueue(), "Maximum number of queued requests"},
-		"OLLAMA_MODELS":            {"OLLAMA_MODELS", Models(), "The path to the models directory"},
-		"OLLAMA_NOHISTORY":         {"OLLAMA_NOHISTORY", NoHistory(), "Do not preserve readline history"},
-		"OLLAMA_NOPRUNE":           {"OLLAMA_NOPRUNE", NoPrune(), "Do not prune model blobs on startup"},
-		"OLLAMA_NUM_PARALLEL":      {"OLLAMA_NUM_PARALLEL", NumParallel(), "Maximum number of parallel requests"},
-		"OLLAMA_ORIGINS":           {"OLLAMA_ORIGINS", AllowedOrigins(), "A comma separated list of allowed origins"},
-		"OLLAMA_SCHED_SPREAD":      {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"},
-		"OLLAMA_MULTIUSER_CACHE":   {"OLLAMA_MULTIUSER_CACHE", MultiUserCache(), "Optimize prompt caching for multi-user scenarios"},
-		"OLLAMA_CONTEXT_LENGTH":    {"OLLAMA_CONTEXT_LENGTH", ContextLength(), "Context length to use unless otherwise specified (default: 4096)"},
-		"OLLAMA_NEW_ENGINE":        {"OLLAMA_NEW_ENGINE", NewEngine(), "Enable the new Ollama engine"},
-		"OLLAMA_REMOTES":           {"OLLAMA_REMOTES", Remotes(), "Allowed hosts for remote models (default \"ollama.com\")"},
-
-		// Informational
-		"HTTP_PROXY":  {"HTTP_PROXY", String("HTTP_PROXY")(), "HTTP proxy"},
-		"HTTPS_PROXY": {"HTTPS_PROXY", String("HTTPS_PROXY")(), "HTTPS proxy"},
-		"NO_PROXY":    {"NO_PROXY", String("NO_PROXY")(), "No proxy"},
-	}
-
-	if runtime.GOOS != "windows" {
-		// Windows environment variables are case-insensitive so there's no need to duplicate them
-		ret["http_proxy"] = EnvVar{"http_proxy", String("http_proxy")(), "HTTP proxy"}
-		ret["https_proxy"] = EnvVar{"https_proxy", String("https_proxy")(), "HTTPS proxy"}
-		ret["no_proxy"] = EnvVar{"no_proxy", String("no_proxy")(), "No proxy"}
-	}
-
-	if runtime.GOOS != "darwin" {
-		ret["CUDA_VISIBLE_DEVICES"] = EnvVar{"CUDA_VISIBLE_DEVICES", CudaVisibleDevices(), "Set which NVIDIA devices are visible"}
-		ret["HIP_VISIBLE_DEVICES"] = EnvVar{"HIP_VISIBLE_DEVICES", HipVisibleDevices(), "Set which AMD devices are visible by numeric ID"}
-		ret["ROCR_VISIBLE_DEVICES"] = EnvVar{"ROCR_VISIBLE_DEVICES", RocrVisibleDevices(), "Set which AMD devices are visible by UUID or numeric ID"}
-		ret["GGML_VK_VISIBLE_DEVICES"] = EnvVar{"GGML_VK_VISIBLE_DEVICES", VkVisibleDevices(), "Set which Vulkan devices are visible by numeric ID"}
-		ret["GPU_DEVICE_ORDINAL"] = EnvVar{"GPU_DEVICE_ORDINAL", GpuDeviceOrdinal(), "Set which AMD devices are visible by numeric ID"}
-		ret["HSA_OVERRIDE_GFX_VERSION"] = EnvVar{"HSA_OVERRIDE_GFX_VERSION", HsaOverrideGfxVersion(), "Override the gfx used for all detected AMD GPUs"}
-		ret["OLLAMA_VULKAN"] = EnvVar{"OLLAMA_VULKAN", EnableVulkan(), "Enable experimental Vulkan support"}
-	}
-
-	return ret
+func (i item) IsZero() bool {
+	return (i.value == i.defaultValue) || (i.defaultValue == nil && reflect.ValueOf(i.value).IsZero())
 }

-func Values() map[string]string {
-	vals := make(map[string]string)
-	for k, v := range AsMap() {
-		vals[k] = fmt.Sprintf("%v", v.Value)
+func (i item) LogValue() slog.Value {
+	return slog.GroupValue(slog.Any(i.name, i.value))
+}
+
+type slice []item
+
+func (s slice) LogValue() slog.Value {
+	attrs := make([]slog.Attr, 0, 2*len(s))
+	for _, e := range s {
+		attrs = append(attrs, e.LogValue().Group()...)
 	}
-	return vals
+	return slog.GroupValue(attrs...)
+}
+
+var all = slice{
+	{true, "OLLAMA_DEBUG", "Show additional debug information (e.g. OLLAMA_DEBUG=1). Verbosity increase with value", LogLevel(), nil},
+	{true, "OLLAMA_FLASH_ATTENTION", "Enable flash attention", FlashAttention(false), nil},
+	{true, "OLLAMA_KV_CACHE_TYPE", "Quantization type for the K/V cache", KvCacheType(), nil},
+	{true, "OLLAMA_GPU_OVERHEAD", "Reserve a portion of VRAM per GPU (bytes)", GpuOverhead(), 0},
+	{true, "OLLAMA_HOST", "IP Address for the ollama server", Host(), "127.0.0.1:11434"},
+	{true, "OLLAMA_KEEP_ALIVE", "The duration that models stay loaded in memory", KeepAlive(), 5 * time.Minute},
+	{true, "OLLAMA_LLM_LIBRARY", "Set LLM library to bypass autodetection", LLMLibrary(), nil},
+	{true, "OLLAMA_LOAD_TIMEOUT", "How long to allow model loads to stall before giving up", LoadTimeout(), 5 * time.Minute},
+	{true, "OLLAMA_MAX_LOADED_MODELS", "Maximum number of loaded models per GPU", MaxRunners(), 0},
+	{true, "OLLAMA_MAX_QUEUE", "Maximum number of queued requests", MaxQueue(), 512},
+	{true, "OLLAMA_MODELS", "The path to the models directory", Models(), filepath.Join(os.Getenv("HOME"), ".ollama", "models")},
+	{true, "OLLAMA_NOHISTORY", "Do not preserve readline history", NoHistory(), false},
+	{true, "OLLAMA_NOPRUNE", "Do not prune model blobs on startup", NoPrune(), false},
+	{true, "OLLAMA_NUM_PARALLEL", "Maximum number of parallel requests", NumParallel(), 1},
+	{true, "OLLAMA_ORIGINS", "A comma separated list of allowed origins", AllowedOrigins(), nil},
+	{true, "OLLAMA_SCHED_SPREAD", "Always schedule model across all GPUs", SchedSpread(), false},
+	{true, "OLLAMA_MULTIUSER_CACHE", "Optimize prompt caching for multi-user scenarios", MultiUserCache(), false},
+	{true, "OLLAMA_CONTEXT_LENGTH", "Context length to use unless otherwise specified", ContextLength(), 4096},
+	{true, "OLLAMA_NEW_ENGINE", "Enable the new Ollama engine", NewEngine(), false},
+	{true, "OLLAMA_REMOTES", "Allowed hosts for remote models", Remotes(), []string{"ollama.com"}},
+	{runtime.GOOS != "windows", "HTTP_PROXY", "HTTP proxy", String("http_proxy")(), nil},
+	{runtime.GOOS != "windows", "HTTPS_PROXY", "HTTPS proxy", String("https_proxy")(), nil},
+	{runtime.GOOS != "windows", "NO_PROXY", "No proxy", String("no_proxy")(), nil},
+	{runtime.GOOS != "darwin", "CUDA_VISIBLE_DEVICES", "Set which NVIDIA devices are visible", CudaVisibleDevices(), nil},
+	{runtime.GOOS != "darwin", "HIP_VISIBLE_DEVICES", "Set which AMD devices are visible by numeric ID", HipVisibleDevices(), nil},
+	{runtime.GOOS != "darwin", "ROCR_VISIBLE_DEVICES", "Set which AMD devices are visible by UUID or numeric ID", RocrVisibleDevices(), nil},
+	{runtime.GOOS != "darwin", "GGML_VK_VISIBLE_DEVICES", "Set which Vulkan devices are visible by numeric ID", VkVisibleDevices(), nil},
+	{runtime.GOOS != "darwin", "GPU_DEVICE_ORDINAL", "Set which AMD devices are visible by numeric ID", GpuDeviceOrdinal(), nil},
+	{runtime.GOOS != "darwin", "HSA_OVERRIDE_GFX_VERSION", "Override the gfx used for all detected AMD GPUs", HsaOverrideGfxVersion(), nil},
+}
+
+func Enabled() slice {
+	enabled := make(slice, 0, len(all))
+	for _, i := range all {
+		if i.enable {
+			enabled = append(enabled, i)
+		}
+	}
+	return enabled
+}
+
+func Lookup(s ...string) []item {
+	enabled := Enabled()
+	filtered := make([]item, 0, len(s))
+	for _, k := range s {
+		if i := slices.IndexFunc(enabled, func(i item) bool { return i.name == k }); i != -1 {
+			filtered = append(filtered, enabled[i])
+		}
+	}
+	return filtered
+}
+
+// Usage returns enabled environment variables and their usage descriptions.
+// If a variable has a default value, it is included in the description.
+func Usage(s ...string) map[string]string {
+	enabled := Enabled()
+	m := make(map[string]string, len(s))
+	for _, k := range s {
+		if i := slices.IndexFunc(enabled, func(i item) bool { return i.name == k }); i != -1 {
+			m[k] = enabled[i].usage
+			if enabled[i].defaultValue != nil {
+				m[k] += fmt.Sprintf(" (default: %v)", enabled[i].defaultValue)
+			}
+		}
+	}
+	return m
 }

 // Var returns an environment variable stripped of leading and trailing quotes or spaces
--- a/server/routes.go
+++ b/server/routes.go
@@ -1541,7 +1541,7 @@ func (s *Server) GenerateRoutes(rc *ollama.Registry) (http.Handler, error) {

 func Serve(ln net.Listener) error {
 	slog.SetDefault(logutil.NewLogger(os.Stderr, envconfig.LogLevel()))
-	slog.Info("server config", "env", envconfig.Values())
+	slog.Info("server environment configuration", "", envconfig.Enabled())

 	blobsDir, err := GetBlobsPath("")
 	if err != nil {