mirror of
https://github.com/ollama/ollama.git
synced 2026-01-18 20:39:13 -05:00
Compare commits
1 Commits
parth/decr
...
mxyng/envi
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8135bff40d |
190
cmd/cmd.go
190
cmd/cmd.go
@@ -5,6 +5,7 @@ import (
|
||||
"context"
|
||||
"crypto/ed25519"
|
||||
"crypto/rand"
|
||||
_ "embed"
|
||||
"encoding/json"
|
||||
"encoding/pem"
|
||||
"errors"
|
||||
@@ -47,6 +48,9 @@ import (
|
||||
"github.com/ollama/ollama/version"
|
||||
)
|
||||
|
||||
//go:embed usage.gotmpl
|
||||
var usageTemplate string
|
||||
|
||||
const ConnectInstructions = "To sign in, navigate to:\n %s\n\n"
|
||||
|
||||
// ensureThinkingSupport emits a warning if the model does not advertise thinking support
|
||||
@@ -1664,21 +1668,6 @@ func versionHandler(cmd *cobra.Command, _ []string) {
|
||||
}
|
||||
}
|
||||
|
||||
func appendEnvDocs(cmd *cobra.Command, envs []envconfig.EnvVar) {
|
||||
if len(envs) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
envUsage := `
|
||||
Environment Variables:
|
||||
`
|
||||
for _, e := range envs {
|
||||
envUsage += fmt.Sprintf(" %-24s %s\n", e.Name, e.Description)
|
||||
}
|
||||
|
||||
cmd.SetUsageTemplate(cmd.UsageTemplate() + envUsage)
|
||||
}
|
||||
|
||||
func NewCLI() *cobra.Command {
|
||||
log.SetFlags(log.LstdFlags | log.Lshortfile)
|
||||
cobra.EnableCommandSorting = false
|
||||
@@ -1708,22 +1697,24 @@ func NewCLI() *cobra.Command {
|
||||
rootCmd.Flags().BoolP("version", "v", false, "Show version information")
|
||||
|
||||
createCmd := &cobra.Command{
|
||||
Use: "create MODEL",
|
||||
Short: "Create a model",
|
||||
Args: cobra.ExactArgs(1),
|
||||
PreRunE: checkServerHeartbeat,
|
||||
RunE: CreateHandler,
|
||||
Use: "create MODEL",
|
||||
Short: "Create a model",
|
||||
Args: cobra.ExactArgs(1),
|
||||
PreRunE: checkServerHeartbeat,
|
||||
RunE: CreateHandler,
|
||||
Annotations: envconfig.Usage("OLLAMA_HOST"),
|
||||
}
|
||||
|
||||
createCmd.Flags().StringP("file", "f", "", "Name of the Modelfile (default \"Modelfile\")")
|
||||
createCmd.Flags().StringP("quantize", "q", "", "Quantize model to this level (e.g. q4_K_M)")
|
||||
|
||||
showCmd := &cobra.Command{
|
||||
Use: "show MODEL",
|
||||
Short: "Show information for a model",
|
||||
Args: cobra.ExactArgs(1),
|
||||
PreRunE: checkServerHeartbeat,
|
||||
RunE: ShowHandler,
|
||||
Use: "show MODEL",
|
||||
Short: "Show information for a model",
|
||||
Args: cobra.ExactArgs(1),
|
||||
PreRunE: checkServerHeartbeat,
|
||||
RunE: ShowHandler,
|
||||
Annotations: envconfig.Usage("OLLAMA_HOST"),
|
||||
}
|
||||
|
||||
showCmd.Flags().Bool("license", false, "Show license of a model")
|
||||
@@ -1734,11 +1725,12 @@ func NewCLI() *cobra.Command {
|
||||
showCmd.Flags().BoolP("verbose", "v", false, "Show detailed model information")
|
||||
|
||||
runCmd := &cobra.Command{
|
||||
Use: "run MODEL [PROMPT]",
|
||||
Short: "Run a model",
|
||||
Args: cobra.MinimumNArgs(1),
|
||||
PreRunE: checkServerHeartbeat,
|
||||
RunE: RunHandler,
|
||||
Use: "run MODEL [PROMPT]",
|
||||
Short: "Run a model",
|
||||
Args: cobra.MinimumNArgs(1),
|
||||
PreRunE: checkServerHeartbeat,
|
||||
RunE: RunHandler,
|
||||
Annotations: envconfig.Usage("OLLAMA_HOST", "OLLAMA_NOHISTORY"),
|
||||
}
|
||||
|
||||
runCmd.Flags().String("keepalive", "", "Duration to keep a model loaded (e.g. 5m)")
|
||||
@@ -1753,11 +1745,12 @@ func NewCLI() *cobra.Command {
|
||||
runCmd.Flags().Int("dimensions", 0, "Truncate output embeddings to specified dimension (embedding models only)")
|
||||
|
||||
stopCmd := &cobra.Command{
|
||||
Use: "stop MODEL",
|
||||
Short: "Stop a running model",
|
||||
Args: cobra.ExactArgs(1),
|
||||
PreRunE: checkServerHeartbeat,
|
||||
RunE: StopHandler,
|
||||
Use: "stop MODEL",
|
||||
Short: "Stop a running model",
|
||||
Args: cobra.ExactArgs(1),
|
||||
PreRunE: checkServerHeartbeat,
|
||||
RunE: StopHandler,
|
||||
Annotations: envconfig.Usage("OLLAMA_HOST"),
|
||||
}
|
||||
|
||||
serveCmd := &cobra.Command{
|
||||
@@ -1766,24 +1759,44 @@ func NewCLI() *cobra.Command {
|
||||
Short: "Start ollama",
|
||||
Args: cobra.ExactArgs(0),
|
||||
RunE: RunServer,
|
||||
Annotations: envconfig.Usage(
|
||||
"OLLAMA_DEBUG",
|
||||
"OLLAMA_HOST",
|
||||
"OLLAMA_CONTEXT_LENGTH",
|
||||
"OLLAMA_KEEP_ALIVE",
|
||||
"OLLAMA_MAX_LOADED_MODELS",
|
||||
"OLLAMA_MAX_QUEUE",
|
||||
"OLLAMA_MODELS",
|
||||
"OLLAMA_NUM_PARALLEL",
|
||||
"OLLAMA_NOPRUNE",
|
||||
"OLLAMA_ORIGINS",
|
||||
"OLLAMA_SCHED_SPREAD",
|
||||
"OLLAMA_FLASH_ATTENTION",
|
||||
"OLLAMA_KV_CACHE_TYPE",
|
||||
"OLLAMA_LLM_LIBRARY",
|
||||
"OLLAMA_GPU_OVERHEAD",
|
||||
"OLLAMA_LOAD_TIMEOUT",
|
||||
),
|
||||
}
|
||||
|
||||
pullCmd := &cobra.Command{
|
||||
Use: "pull MODEL",
|
||||
Short: "Pull a model from a registry",
|
||||
Args: cobra.ExactArgs(1),
|
||||
PreRunE: checkServerHeartbeat,
|
||||
RunE: PullHandler,
|
||||
Use: "pull MODEL",
|
||||
Short: "Pull a model from a registry",
|
||||
Args: cobra.ExactArgs(1),
|
||||
PreRunE: checkServerHeartbeat,
|
||||
RunE: PullHandler,
|
||||
Annotations: envconfig.Usage("OLLAMA_HOST"),
|
||||
}
|
||||
|
||||
pullCmd.Flags().Bool("insecure", false, "Use an insecure registry")
|
||||
|
||||
pushCmd := &cobra.Command{
|
||||
Use: "push MODEL",
|
||||
Short: "Push a model to a registry",
|
||||
Args: cobra.ExactArgs(1),
|
||||
PreRunE: checkServerHeartbeat,
|
||||
RunE: PushHandler,
|
||||
Use: "push MODEL",
|
||||
Short: "Push a model to a registry",
|
||||
Args: cobra.ExactArgs(1),
|
||||
PreRunE: checkServerHeartbeat,
|
||||
RunE: PushHandler,
|
||||
Annotations: envconfig.Usage("OLLAMA_HOST"),
|
||||
}
|
||||
|
||||
pushCmd.Flags().Bool("insecure", false, "Use an insecure registry")
|
||||
@@ -1805,33 +1818,37 @@ func NewCLI() *cobra.Command {
|
||||
}
|
||||
|
||||
listCmd := &cobra.Command{
|
||||
Use: "list",
|
||||
Aliases: []string{"ls"},
|
||||
Short: "List models",
|
||||
PreRunE: checkServerHeartbeat,
|
||||
RunE: ListHandler,
|
||||
Use: "list",
|
||||
Aliases: []string{"ls"},
|
||||
Short: "List models",
|
||||
PreRunE: checkServerHeartbeat,
|
||||
RunE: ListHandler,
|
||||
Annotations: envconfig.Usage("OLLAMA_HOST"),
|
||||
}
|
||||
|
||||
psCmd := &cobra.Command{
|
||||
Use: "ps",
|
||||
Short: "List running models",
|
||||
PreRunE: checkServerHeartbeat,
|
||||
RunE: ListRunningHandler,
|
||||
Use: "ps",
|
||||
Short: "List running models",
|
||||
PreRunE: checkServerHeartbeat,
|
||||
RunE: ListRunningHandler,
|
||||
Annotations: envconfig.Usage("OLLAMA_HOST"),
|
||||
}
|
||||
copyCmd := &cobra.Command{
|
||||
Use: "cp SOURCE DESTINATION",
|
||||
Short: "Copy a model",
|
||||
Args: cobra.ExactArgs(2),
|
||||
PreRunE: checkServerHeartbeat,
|
||||
RunE: CopyHandler,
|
||||
Use: "cp SOURCE DESTINATION",
|
||||
Short: "Copy a model",
|
||||
Args: cobra.ExactArgs(2),
|
||||
PreRunE: checkServerHeartbeat,
|
||||
RunE: CopyHandler,
|
||||
Annotations: envconfig.Usage("OLLAMA_HOST"),
|
||||
}
|
||||
|
||||
deleteCmd := &cobra.Command{
|
||||
Use: "rm MODEL [MODEL...]",
|
||||
Short: "Remove a model",
|
||||
Args: cobra.MinimumNArgs(1),
|
||||
PreRunE: checkServerHeartbeat,
|
||||
RunE: DeleteHandler,
|
||||
Use: "rm MODEL [MODEL...]",
|
||||
Short: "Remove a model",
|
||||
Args: cobra.MinimumNArgs(1),
|
||||
PreRunE: checkServerHeartbeat,
|
||||
RunE: DeleteHandler,
|
||||
Annotations: envconfig.Usage("OLLAMA_HOST"),
|
||||
}
|
||||
|
||||
runnerCmd := &cobra.Command{
|
||||
@@ -1846,50 +1863,6 @@ func NewCLI() *cobra.Command {
|
||||
_ = runner.Execute(args[1:])
|
||||
})
|
||||
|
||||
envVars := envconfig.AsMap()
|
||||
|
||||
envs := []envconfig.EnvVar{envVars["OLLAMA_HOST"]}
|
||||
|
||||
for _, cmd := range []*cobra.Command{
|
||||
createCmd,
|
||||
showCmd,
|
||||
runCmd,
|
||||
stopCmd,
|
||||
pullCmd,
|
||||
pushCmd,
|
||||
listCmd,
|
||||
psCmd,
|
||||
copyCmd,
|
||||
deleteCmd,
|
||||
serveCmd,
|
||||
} {
|
||||
switch cmd {
|
||||
case runCmd:
|
||||
appendEnvDocs(cmd, []envconfig.EnvVar{envVars["OLLAMA_HOST"], envVars["OLLAMA_NOHISTORY"]})
|
||||
case serveCmd:
|
||||
appendEnvDocs(cmd, []envconfig.EnvVar{
|
||||
envVars["OLLAMA_DEBUG"],
|
||||
envVars["OLLAMA_HOST"],
|
||||
envVars["OLLAMA_CONTEXT_LENGTH"],
|
||||
envVars["OLLAMA_KEEP_ALIVE"],
|
||||
envVars["OLLAMA_MAX_LOADED_MODELS"],
|
||||
envVars["OLLAMA_MAX_QUEUE"],
|
||||
envVars["OLLAMA_MODELS"],
|
||||
envVars["OLLAMA_NUM_PARALLEL"],
|
||||
envVars["OLLAMA_NOPRUNE"],
|
||||
envVars["OLLAMA_ORIGINS"],
|
||||
envVars["OLLAMA_SCHED_SPREAD"],
|
||||
envVars["OLLAMA_FLASH_ATTENTION"],
|
||||
envVars["OLLAMA_KV_CACHE_TYPE"],
|
||||
envVars["OLLAMA_LLM_LIBRARY"],
|
||||
envVars["OLLAMA_GPU_OVERHEAD"],
|
||||
envVars["OLLAMA_LOAD_TIMEOUT"],
|
||||
})
|
||||
default:
|
||||
appendEnvDocs(cmd, envs)
|
||||
}
|
||||
}
|
||||
|
||||
rootCmd.AddCommand(
|
||||
serveCmd,
|
||||
createCmd,
|
||||
@@ -1907,6 +1880,7 @@ func NewCLI() *cobra.Command {
|
||||
runnerCmd,
|
||||
)
|
||||
|
||||
rootCmd.SetUsageTemplate(usageTemplate)
|
||||
return rootCmd
|
||||
}
|
||||
|
||||
|
||||
88
cmd/usage.gotmpl
Normal file
88
cmd/usage.gotmpl
Normal file
@@ -0,0 +1,88 @@
|
||||
Usage:
|
||||
{{- if .Runnable }} {{ .UseLine }}
|
||||
{{- end }}
|
||||
{{- if .HasAvailableSubCommands }} {{ .CommandPath }} [command]
|
||||
{{- end }}
|
||||
|
||||
{{- if gt (len .Aliases) 0}}
|
||||
|
||||
Aliases:
|
||||
{{ .NameAndAliases }}
|
||||
{{- end }}
|
||||
|
||||
{{- if .HasExample }}
|
||||
|
||||
Examples:
|
||||
{{ .Example }}
|
||||
{{- end }}
|
||||
|
||||
{{- if .HasAvailableSubCommands }}
|
||||
{{- if eq (len .Groups) 0}}
|
||||
|
||||
Available Commands:
|
||||
{{- range .Commands }}
|
||||
{{- if or .IsAvailableCommand (eq .Name "help") }}
|
||||
{{ rpad .Name .NamePadding }} {{ .Short }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{- else }}
|
||||
|
||||
{{- range .Groups }}
|
||||
|
||||
{{ .Title }}
|
||||
|
||||
{{- range $.Commands }}
|
||||
{{- if and (eq .GroupID .ID) (or .IsAvailableCommand (eq .Name "help")) }}
|
||||
{{ rpad .Name .NamePadding }} {{ .Short }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{- if not .AllChildCommandsHaveGroup }}
|
||||
|
||||
Additional Commands:
|
||||
{{- range $.Commands }}
|
||||
{{- if and (eq .GroupID "") (or .IsAvailableCommand (eq .Name "help")) }}
|
||||
{{ rpad .Name .NamePadding }} {{ .Short }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{- if .HasAvailableLocalFlags }}
|
||||
|
||||
Flags:
|
||||
{{ .LocalFlags.FlagUsages | trimTrailingWhitespaces }}
|
||||
{{- end }}
|
||||
|
||||
{{- if .HasAvailableInheritedFlags }}
|
||||
|
||||
Global Flags:
|
||||
{{ .InheritedFlags.FlagUsages | trimTrailingWhitespaces }}
|
||||
{{- end }}
|
||||
|
||||
{{- /* Hijack .Annotations for Environment Variables */ -}}
|
||||
{{- if .Annotations }}
|
||||
|
||||
Environment Variables:
|
||||
{{- range $key, $value := .Annotations }}
|
||||
{{ rpad $key 24 }} {{ $value | trimTrailingWhitespaces }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{- if .HasHelpSubCommands }}
|
||||
|
||||
Additional help topics:
|
||||
{{- range .Commands }}
|
||||
{{- if .IsAdditionalHelpTopicCommand }}
|
||||
{{ rpad .CommandPath .CommandPathPadding }} {{ .Short }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{- if .HasAvailableSubCommands }}
|
||||
|
||||
Use "{{ .CommandPath }} [command] --help" for more information about a command.
|
||||
{{- end }}
|
||||
@@ -67,7 +67,14 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
|
||||
slog.Info("discovering available GPUs...")
|
||||
|
||||
// Warn if any user-overrides are set which could lead to incorrect GPU discovery
|
||||
overrideWarnings()
|
||||
overrideWarning(
|
||||
"CUDA_VISIBLE_DEVICES",
|
||||
"HIP_VISIBLE_DEVICES",
|
||||
"ROCR_VISIBLE_DEVICES",
|
||||
"GGML_VK_VISIBLE_DEVICES",
|
||||
"GPU_DEVICE_ORDINAL",
|
||||
"HSA_OVERRIDE_GFX_VERSION",
|
||||
)
|
||||
|
||||
requested := envconfig.LLMLibrary()
|
||||
jetpack := cudaJetpack()
|
||||
@@ -454,23 +461,20 @@ func bootstrapDevices(ctx context.Context, ollamaLibDirs []string, extraEnvs map
|
||||
return devices
|
||||
}
|
||||
|
||||
func overrideWarnings() {
|
||||
anyFound := false
|
||||
m := envconfig.AsMap()
|
||||
for _, k := range []string{
|
||||
"CUDA_VISIBLE_DEVICES",
|
||||
"HIP_VISIBLE_DEVICES",
|
||||
"ROCR_VISIBLE_DEVICES",
|
||||
"GGML_VK_VISIBLE_DEVICES",
|
||||
"GPU_DEVICE_ORDINAL",
|
||||
"HSA_OVERRIDE_GFX_VERSION",
|
||||
} {
|
||||
if e, found := m[k]; found && e.Value != "" {
|
||||
anyFound = true
|
||||
slog.Warn("user overrode visible devices", k, e.Value)
|
||||
func overrideWarning(s ...string) {
|
||||
attrs := make([]slog.Attr, 0, len(s))
|
||||
for _, i := range envconfig.Lookup(s...) {
|
||||
if !i.IsZero() {
|
||||
attrs = append(attrs, i.LogValue().Group()...)
|
||||
}
|
||||
}
|
||||
if anyFound {
|
||||
slog.Warn("if GPUs are not correctly discovered, unset and try again")
|
||||
|
||||
if len(attrs) > 0 {
|
||||
slog.LogAttrs(
|
||||
context.TODO(),
|
||||
slog.LevelWarn,
|
||||
"user overrode visible devices; if GPUs are not correctly discovered, unset and try again",
|
||||
attrs...,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,7 +8,9 @@ import (
|
||||
"net/url"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"runtime"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
@@ -265,67 +267,97 @@ func Uint64(key string, defaultValue uint64) func() uint64 {
|
||||
// Set aside VRAM per GPU
|
||||
var GpuOverhead = Uint64("OLLAMA_GPU_OVERHEAD", 0)
|
||||
|
||||
type EnvVar struct {
|
||||
Name string
|
||||
Value any
|
||||
Description string
|
||||
type item struct {
|
||||
enable bool
|
||||
name, usage string
|
||||
value, defaultValue any
|
||||
}
|
||||
|
||||
func AsMap() map[string]EnvVar {
|
||||
ret := map[string]EnvVar{
|
||||
"OLLAMA_DEBUG": {"OLLAMA_DEBUG", LogLevel(), "Show additional debug information (e.g. OLLAMA_DEBUG=1)"},
|
||||
"OLLAMA_FLASH_ATTENTION": {"OLLAMA_FLASH_ATTENTION", FlashAttention(false), "Enabled flash attention"},
|
||||
"OLLAMA_KV_CACHE_TYPE": {"OLLAMA_KV_CACHE_TYPE", KvCacheType(), "Quantization type for the K/V cache (default: f16)"},
|
||||
"OLLAMA_GPU_OVERHEAD": {"OLLAMA_GPU_OVERHEAD", GpuOverhead(), "Reserve a portion of VRAM per GPU (bytes)"},
|
||||
"OLLAMA_HOST": {"OLLAMA_HOST", Host(), "IP Address for the ollama server (default 127.0.0.1:11434)"},
|
||||
"OLLAMA_KEEP_ALIVE": {"OLLAMA_KEEP_ALIVE", KeepAlive(), "The duration that models stay loaded in memory (default \"5m\")"},
|
||||
"OLLAMA_LLM_LIBRARY": {"OLLAMA_LLM_LIBRARY", LLMLibrary(), "Set LLM library to bypass autodetection"},
|
||||
"OLLAMA_LOAD_TIMEOUT": {"OLLAMA_LOAD_TIMEOUT", LoadTimeout(), "How long to allow model loads to stall before giving up (default \"5m\")"},
|
||||
"OLLAMA_MAX_LOADED_MODELS": {"OLLAMA_MAX_LOADED_MODELS", MaxRunners(), "Maximum number of loaded models per GPU"},
|
||||
"OLLAMA_MAX_QUEUE": {"OLLAMA_MAX_QUEUE", MaxQueue(), "Maximum number of queued requests"},
|
||||
"OLLAMA_MODELS": {"OLLAMA_MODELS", Models(), "The path to the models directory"},
|
||||
"OLLAMA_NOHISTORY": {"OLLAMA_NOHISTORY", NoHistory(), "Do not preserve readline history"},
|
||||
"OLLAMA_NOPRUNE": {"OLLAMA_NOPRUNE", NoPrune(), "Do not prune model blobs on startup"},
|
||||
"OLLAMA_NUM_PARALLEL": {"OLLAMA_NUM_PARALLEL", NumParallel(), "Maximum number of parallel requests"},
|
||||
"OLLAMA_ORIGINS": {"OLLAMA_ORIGINS", AllowedOrigins(), "A comma separated list of allowed origins"},
|
||||
"OLLAMA_SCHED_SPREAD": {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"},
|
||||
"OLLAMA_MULTIUSER_CACHE": {"OLLAMA_MULTIUSER_CACHE", MultiUserCache(), "Optimize prompt caching for multi-user scenarios"},
|
||||
"OLLAMA_CONTEXT_LENGTH": {"OLLAMA_CONTEXT_LENGTH", ContextLength(), "Context length to use unless otherwise specified (default: 4096)"},
|
||||
"OLLAMA_NEW_ENGINE": {"OLLAMA_NEW_ENGINE", NewEngine(), "Enable the new Ollama engine"},
|
||||
"OLLAMA_REMOTES": {"OLLAMA_REMOTES", Remotes(), "Allowed hosts for remote models (default \"ollama.com\")"},
|
||||
|
||||
// Informational
|
||||
"HTTP_PROXY": {"HTTP_PROXY", String("HTTP_PROXY")(), "HTTP proxy"},
|
||||
"HTTPS_PROXY": {"HTTPS_PROXY", String("HTTPS_PROXY")(), "HTTPS proxy"},
|
||||
"NO_PROXY": {"NO_PROXY", String("NO_PROXY")(), "No proxy"},
|
||||
}
|
||||
|
||||
if runtime.GOOS != "windows" {
|
||||
// Windows environment variables are case-insensitive so there's no need to duplicate them
|
||||
ret["http_proxy"] = EnvVar{"http_proxy", String("http_proxy")(), "HTTP proxy"}
|
||||
ret["https_proxy"] = EnvVar{"https_proxy", String("https_proxy")(), "HTTPS proxy"}
|
||||
ret["no_proxy"] = EnvVar{"no_proxy", String("no_proxy")(), "No proxy"}
|
||||
}
|
||||
|
||||
if runtime.GOOS != "darwin" {
|
||||
ret["CUDA_VISIBLE_DEVICES"] = EnvVar{"CUDA_VISIBLE_DEVICES", CudaVisibleDevices(), "Set which NVIDIA devices are visible"}
|
||||
ret["HIP_VISIBLE_DEVICES"] = EnvVar{"HIP_VISIBLE_DEVICES", HipVisibleDevices(), "Set which AMD devices are visible by numeric ID"}
|
||||
ret["ROCR_VISIBLE_DEVICES"] = EnvVar{"ROCR_VISIBLE_DEVICES", RocrVisibleDevices(), "Set which AMD devices are visible by UUID or numeric ID"}
|
||||
ret["GGML_VK_VISIBLE_DEVICES"] = EnvVar{"GGML_VK_VISIBLE_DEVICES", VkVisibleDevices(), "Set which Vulkan devices are visible by numeric ID"}
|
||||
ret["GPU_DEVICE_ORDINAL"] = EnvVar{"GPU_DEVICE_ORDINAL", GpuDeviceOrdinal(), "Set which AMD devices are visible by numeric ID"}
|
||||
ret["HSA_OVERRIDE_GFX_VERSION"] = EnvVar{"HSA_OVERRIDE_GFX_VERSION", HsaOverrideGfxVersion(), "Override the gfx used for all detected AMD GPUs"}
|
||||
ret["OLLAMA_VULKAN"] = EnvVar{"OLLAMA_VULKAN", EnableVulkan(), "Enable experimental Vulkan support"}
|
||||
}
|
||||
|
||||
return ret
|
||||
func (i item) IsZero() bool {
|
||||
return (i.value == i.defaultValue) || (i.defaultValue == nil && reflect.ValueOf(i.value).IsZero())
|
||||
}
|
||||
|
||||
func Values() map[string]string {
|
||||
vals := make(map[string]string)
|
||||
for k, v := range AsMap() {
|
||||
vals[k] = fmt.Sprintf("%v", v.Value)
|
||||
func (i item) LogValue() slog.Value {
|
||||
return slog.GroupValue(slog.Any(i.name, i.value))
|
||||
}
|
||||
|
||||
type slice []item
|
||||
|
||||
func (s slice) LogValue() slog.Value {
|
||||
attrs := make([]slog.Attr, 0, 2*len(s))
|
||||
for _, e := range s {
|
||||
attrs = append(attrs, e.LogValue().Group()...)
|
||||
}
|
||||
return vals
|
||||
return slog.GroupValue(attrs...)
|
||||
}
|
||||
|
||||
var all = slice{
|
||||
{true, "OLLAMA_DEBUG", "Show additional debug information (e.g. OLLAMA_DEBUG=1). Verbosity increase with value", LogLevel(), nil},
|
||||
{true, "OLLAMA_FLASH_ATTENTION", "Enable flash attention", FlashAttention(false), nil},
|
||||
{true, "OLLAMA_KV_CACHE_TYPE", "Quantization type for the K/V cache", KvCacheType(), nil},
|
||||
{true, "OLLAMA_GPU_OVERHEAD", "Reserve a portion of VRAM per GPU (bytes)", GpuOverhead(), 0},
|
||||
{true, "OLLAMA_HOST", "IP Address for the ollama server", Host(), "127.0.0.1:11434"},
|
||||
{true, "OLLAMA_KEEP_ALIVE", "The duration that models stay loaded in memory", KeepAlive(), 5 * time.Minute},
|
||||
{true, "OLLAMA_LLM_LIBRARY", "Set LLM library to bypass autodetection", LLMLibrary(), nil},
|
||||
{true, "OLLAMA_LOAD_TIMEOUT", "How long to allow model loads to stall before giving up", LoadTimeout(), 5 * time.Minute},
|
||||
{true, "OLLAMA_MAX_LOADED_MODELS", "Maximum number of loaded models per GPU", MaxRunners(), 0},
|
||||
{true, "OLLAMA_MAX_QUEUE", "Maximum number of queued requests", MaxQueue(), 512},
|
||||
{true, "OLLAMA_MODELS", "The path to the models directory", Models(), filepath.Join(os.Getenv("HOME"), ".ollama", "models")},
|
||||
{true, "OLLAMA_NOHISTORY", "Do not preserve readline history", NoHistory(), false},
|
||||
{true, "OLLAMA_NOPRUNE", "Do not prune model blobs on startup", NoPrune(), false},
|
||||
{true, "OLLAMA_NUM_PARALLEL", "Maximum number of parallel requests", NumParallel(), 1},
|
||||
{true, "OLLAMA_ORIGINS", "A comma separated list of allowed origins", AllowedOrigins(), nil},
|
||||
{true, "OLLAMA_SCHED_SPREAD", "Always schedule model across all GPUs", SchedSpread(), false},
|
||||
{true, "OLLAMA_MULTIUSER_CACHE", "Optimize prompt caching for multi-user scenarios", MultiUserCache(), false},
|
||||
{true, "OLLAMA_CONTEXT_LENGTH", "Context length to use unless otherwise specified", ContextLength(), 4096},
|
||||
{true, "OLLAMA_NEW_ENGINE", "Enable the new Ollama engine", NewEngine(), false},
|
||||
{true, "OLLAMA_REMOTES", "Allowed hosts for remote models", Remotes(), []string{"ollama.com"}},
|
||||
{runtime.GOOS != "windows", "HTTP_PROXY", "HTTP proxy", String("http_proxy")(), nil},
|
||||
{runtime.GOOS != "windows", "HTTPS_PROXY", "HTTPS proxy", String("https_proxy")(), nil},
|
||||
{runtime.GOOS != "windows", "NO_PROXY", "No proxy", String("no_proxy")(), nil},
|
||||
{runtime.GOOS != "darwin", "CUDA_VISIBLE_DEVICES", "Set which NVIDIA devices are visible", CudaVisibleDevices(), nil},
|
||||
{runtime.GOOS != "darwin", "HIP_VISIBLE_DEVICES", "Set which AMD devices are visible by numeric ID", HipVisibleDevices(), nil},
|
||||
{runtime.GOOS != "darwin", "ROCR_VISIBLE_DEVICES", "Set which AMD devices are visible by UUID or numeric ID", RocrVisibleDevices(), nil},
|
||||
{runtime.GOOS != "darwin", "GGML_VK_VISIBLE_DEVICES", "Set which Vulkan devices are visible by numeric ID", VkVisibleDevices(), nil},
|
||||
{runtime.GOOS != "darwin", "GPU_DEVICE_ORDINAL", "Set which AMD devices are visible by numeric ID", GpuDeviceOrdinal(), nil},
|
||||
{runtime.GOOS != "darwin", "HSA_OVERRIDE_GFX_VERSION", "Override the gfx used for all detected AMD GPUs", HsaOverrideGfxVersion(), nil},
|
||||
}
|
||||
|
||||
func Enabled() slice {
|
||||
enabled := make(slice, 0, len(all))
|
||||
for _, i := range all {
|
||||
if i.enable {
|
||||
enabled = append(enabled, i)
|
||||
}
|
||||
}
|
||||
return enabled
|
||||
}
|
||||
|
||||
func Lookup(s ...string) []item {
|
||||
enabled := Enabled()
|
||||
filtered := make([]item, 0, len(s))
|
||||
for _, k := range s {
|
||||
if i := slices.IndexFunc(enabled, func(i item) bool { return i.name == k }); i != -1 {
|
||||
filtered = append(filtered, enabled[i])
|
||||
}
|
||||
}
|
||||
return filtered
|
||||
}
|
||||
|
||||
// Usage returns enabled environment variables and their usage descriptions.
|
||||
// If a variable has a default value, it is included in the description.
|
||||
func Usage(s ...string) map[string]string {
|
||||
enabled := Enabled()
|
||||
m := make(map[string]string, len(s))
|
||||
for _, k := range s {
|
||||
if i := slices.IndexFunc(enabled, func(i item) bool { return i.name == k }); i != -1 {
|
||||
m[k] = enabled[i].usage
|
||||
if enabled[i].defaultValue != nil {
|
||||
m[k] += fmt.Sprintf(" (default: %v)", enabled[i].defaultValue)
|
||||
}
|
||||
}
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
// Var returns an environment variable stripped of leading and trailing quotes or spaces
|
||||
|
||||
@@ -1541,7 +1541,7 @@ func (s *Server) GenerateRoutes(rc *ollama.Registry) (http.Handler, error) {
|
||||
|
||||
func Serve(ln net.Listener) error {
|
||||
slog.SetDefault(logutil.NewLogger(os.Stderr, envconfig.LogLevel()))
|
||||
slog.Info("server config", "env", envconfig.Values())
|
||||
slog.Info("server environment configuration", "", envconfig.Enabled())
|
||||
|
||||
blobsDir, err := GetBlobsPath("")
|
||||
if err != nil {
|
||||
|
||||
Reference in New Issue
Block a user