show a default message when license/parameters/system prompt/template aren't specified (#681 )

clean up num_gpu calculation code (#673 )
Relay default values to llama runner (#672 )
2025-12-27 17:50:53 -05:00 · 2023-10-02 14:34:52 -07:00 · 2023-10-02 14:53:42 -04:00 · 2023-10-02 14:53:16 -04:00 · 2023-10-02 11:50:55 -07:00 · 2023-10-02 09:04:31 -07:00
11 changed files with 129 additions and 214 deletions
--- a/README.md
+++ b/README.md
@@ -217,6 +217,7 @@ curl -X POST http://localhost:11434/api/generate -d '{
 - [Dagger Chatbot](https://github.com/samalba/dagger-chatbot)
 - [LiteLLM](https://github.com/BerriAI/litellm)
 - [Discord AI Bot](https://github.com/mekb-turtle/discord-ai-bot)
+- [Chatbot UI](https://github.com/ivanfioravanti/chatbot-ollama)
 - [HTML UI](https://github.com/rtcfirefly/ollama-ui)
 - [Typescript UI](https://github.com/ollama-interface/Ollama-Gui?tab=readme-ov-file)
 - [Dumbar](https://github.com/JerrySievert/Dumbar)
--- a/api/types.go
+++ b/api/types.go
@@ -31,22 +31,6 @@ func (e StatusError) Error() string {
 	}
 }

-// /api/chat
-type Message struct {
-	Role    string `json:"role"`
-	Content string `json:"content"`
-}
-
-type ChatRequest struct {
-	Model    string    `json:"model"`
-	Messages []Message `json:"messages"`
-}
-
-type ChatResponse struct {
-	CreatedAt time.Time `json:"created_at"`
-	Message   Message   `json:"message"`
-}
-
 type GenerateRequest struct {
 	Model    string `json:"model"`
 	Prompt   string `json:"prompt"`
@@ -296,38 +280,38 @@ func (opts *Options) FromMap(m map[string]interface{}) error {

 func DefaultOptions() Options {
 	return Options{
-		Seed: -1,
-
-		UseNUMA: false,
-
-		NumCtx:             2048,
-		NumKeep:            -1,
-		NumBatch:           512,
-		NumGPU:             -1, // -1 here indicates that NumGPU should be set dynamically
-		NumGQA:             1,
-		LowVRAM:            false,
-		F16KV:              true,
-		UseMMap:            true,
-		UseMLock:           false,
-		RopeFrequencyBase:  10000.0,
-		RopeFrequencyScale: 1.0,
-		EmbeddingOnly:      true,
-
-		RepeatLastN:      64,
-		RepeatPenalty:    1.1,
-		FrequencyPenalty: 0.0,
-		PresencePenalty:  0.0,
+		// options set on request to runner
+		NumPredict:       -1,
+		NumKeep:          -1,
 		Temperature:      0.8,
 		TopK:             40,
 		TopP:             0.9,
 		TFSZ:             1.0,
 		TypicalP:         1.0,
+		RepeatLastN:      64,
+		RepeatPenalty:    1.1,
+		PresencePenalty:  0.0,
+		FrequencyPenalty: 0.0,
 		Mirostat:         0,
 		MirostatTau:      5.0,
 		MirostatEta:      0.1,
 		PenalizeNewline:  true,
+		Seed:             -1,

-		NumThread: 0, // let the runtime decide
+		// options set when the model is loaded
+		NumCtx:             2048,
+		RopeFrequencyBase:  10000.0,
+		RopeFrequencyScale: 1.0,
+		NumBatch:           512,
+		NumGPU:             -1, // -1 here indicates that NumGPU should be set dynamically
+		NumGQA:             1,
+		NumThread:          0, // let the runtime decide
+		LowVRAM:            false,
+		F16KV:              true,
+		UseMLock:           false,
+		UseMMap:            true,
+		UseNUMA:            false,
+		EmbeddingOnly:      true,
 	}
 }

--- a/cmd/cmd.go
+++ b/cmd/cmd.go
@@ -380,7 +380,20 @@ func pull(model string, insecure bool) error {
 func RunGenerate(cmd *cobra.Command, args []string) error {
 	if len(args) > 1 {
 		// join all args into a single prompt
-		return generate(cmd, args[0], strings.Join(args[1:], " "))
+		wordWrap := false
+		if term.IsTerminal(int(os.Stdout.Fd())) {
+			wordWrap = true
+		}
+
+		nowrap, err := cmd.Flags().GetBool("nowordwrap")
+		if err != nil {
+			return err
+		}
+		if nowrap {
+			wordWrap = false
+		}
+
+		return generate(cmd, args[0], strings.Join(args[1:], " "), wordWrap)
 	}

 	if readline.IsTerminal(int(os.Stdin.Fd())) {
@@ -392,7 +405,7 @@ func RunGenerate(cmd *cobra.Command, args []string) error {

 type generateContextKey string

-func generate(cmd *cobra.Command, model, prompt string) error {
+func generate(cmd *cobra.Command, model, prompt string, wordWrap bool) error {
 	client, err := api.FromEnv()
 	if err != nil {
 		return err
@@ -408,24 +421,9 @@ func generate(cmd *cobra.Command, model, prompt string) error {
 		generateContext = []int{}
 	}

-	var wrapTerm bool
-	termType := os.Getenv("TERM")
-	if termType == "xterm-256color" {
-		wrapTerm = true
-	}
-
 	termWidth, _, err := term.GetSize(int(0))
 	if err != nil {
-		wrapTerm = false
-	}
-
-	// override wrapping if the user turned it off
-	nowrap, err := cmd.Flags().GetBool("nowordwrap")
-	if err != nil {
-		return err
-	}
-	if nowrap {
-		wrapTerm = false
+		wordWrap = false
 	}

 	cancelCtx, cancel := context.WithCancel(context.Background())
@@ -452,7 +450,7 @@ func generate(cmd *cobra.Command, model, prompt string) error {

 		latest = response

-		if wrapTerm {
+		if wordWrap {
 			for _, ch := range response.Response {
 				if currentLineLength+1 > termWidth-5 {
 					// backtrack the length of the last word and clear to the end of the line
@@ -533,7 +531,7 @@ func generateInteractive(cmd *cobra.Command, model string) error {
 	}

 	// load the model
-	if err := generate(cmd, model, ""); err != nil {
+	if err := generate(cmd, model, "", false); err != nil {
 		return err
 	}

@@ -579,6 +577,21 @@ func generateInteractive(cmd *cobra.Command, model string) error {
 	}
 	defer scanner.Close()

+	var wordWrap bool
+	termType := os.Getenv("TERM")
+	if termType == "xterm-256color" {
+		wordWrap = true
+	}
+
+	// override wrapping if the user turned it off
+	nowrap, err := cmd.Flags().GetBool("nowordwrap")
+	if err != nil {
+		return err
+	}
+	if nowrap {
+		wordWrap = false
+	}
+
 	var multiLineBuffer string
 	var isMultiLine bool

@@ -632,10 +645,10 @@ func generateInteractive(cmd *cobra.Command, model string) error {
 				case "nohistory":
 					scanner.HistoryDisable()
 				case "wordwrap":
-					cmd.Flags().Set("nowordwrap", "false")
+					wordWrap = true
 					fmt.Println("Set 'wordwrap' mode.")
 				case "nowordwrap":
-					cmd.Flags().Set("nowordwrap", "true")
+					wordWrap = false
 					fmt.Println("Set 'nowordwrap' mode.")
 				case "verbose":
 					cmd.Flags().Set("verbose", "true")
@@ -673,15 +686,31 @@ func generateInteractive(cmd *cobra.Command, model string) error {

 				switch args[1] {
 				case "license":
-					fmt.Println(resp.License)
+					if resp.License == "" {
+						fmt.Println("No license was specified for this model.\n")
+					} else {
+						fmt.Println(resp.License)
+					}
 				case "modelfile":
 					fmt.Println(resp.Modelfile)
 				case "parameters":
-					fmt.Println(resp.Parameters)
+					if resp.Parameters == "" {
+						fmt.Println("No parameters were specified for this model.\n")
+					} else {
+						fmt.Println(resp.Parameters)
+					}
 				case "system":
-					fmt.Println(resp.System)
+					if resp.System == "" {
+						fmt.Println("No system prompt was specified for this model.\n")
+					} else {
+						fmt.Println(resp.System)
+					}
 				case "template":
-					fmt.Println(resp.Template)
+					if resp.Template == "" {
+						fmt.Println("No prompt template was specified for this model.\n")
+					} else {
+						fmt.Println(resp.Template)
+					}
 				default:
 					fmt.Printf("Unknown command '/show %s'. Type /? for help\n", args[1])
 				}
@@ -698,7 +727,7 @@ func generateInteractive(cmd *cobra.Command, model string) error {
 		}

 		if len(line) > 0 && line[0] != '/' {
-			if err := generate(cmd, model, line); err != nil {
+			if err := generate(cmd, model, line, wordWrap); err != nil {
 				return err
 			}
 		}
@@ -710,7 +739,7 @@ func generateBatch(cmd *cobra.Command, model string) error {
 	for scanner.Scan() {
 		prompt := scanner.Text()
 		fmt.Printf(">>> %s\n", prompt)
-		if err := generate(cmd, model, prompt); err != nil {
+		if err := generate(cmd, model, prompt, false); err != nil {
 			return err
 		}
 	}
--- a/docs/development.md
+++ b/docs/development.md
@@ -10,25 +10,25 @@ Install required tools:
 - go version 1.20 or higher
 - gcc version 11.4.0 or higher

-```
+```bash
 brew install go cmake gcc
 ```

 Get the required libraries:

-```
+```bash
 go generate ./...
 ```

 Then build ollama:

-```
+```bash
 go build .
 ```

 Now you can run `ollama`:

-```
+```bash
 ./ollama
 ```

--- a/docs/faq.md
+++ b/docs/faq.md
@@ -2,13 +2,13 @@

 ## How can I expose the Ollama server?

-```
+```bash
 OLLAMA_HOST=0.0.0.0:11435 ollama serve
 ```

 By default, Ollama allows cross origin requests from `127.0.0.1` and `0.0.0.0`. To support more origins, you can use the `OLLAMA_ORIGINS` environment variable:

-```
+```bash
 OLLAMA_ORIGINS=http://192.168.1.1:*,https://example.com ollama serve
 ```

@@ -16,4 +16,3 @@ OLLAMA_ORIGINS=http://192.168.1.1:*,https://example.com ollama serve

 * macOS: Raw model data is stored under `~/.ollama/models`.
 * Linux: Raw model data is stored under `/usr/share/ollama/.ollama/models`
-
--- a/docs/linux.md
+++ b/docs/linux.md
@@ -2,7 +2,7 @@

 > Note: A one line installer for Ollama is available by running:
 >
-> ```
+> ```bash
 > curl https://ollama.ai/install.sh | sh
 > ```

@@ -10,7 +10,7 @@

 Ollama is distributed as a self-contained binary. Download it to a directory in your PATH:

-```
+```bash
 sudo curl -L https://ollama.ai/download/ollama-linux-amd64 -o /usr/bin/ollama
 sudo chmod +x /usr/bin/ollama
 ```
@@ -19,13 +19,13 @@ sudo chmod +x /usr/bin/ollama

 Start Ollama by running `ollama serve`:

-```
+```bash
 ollama serve
 ```

 Once Ollama is running, run a model in another terminal session:

-```
+```bash
 ollama run llama2
 ```

@@ -35,7 +35,7 @@ ollama run llama2

 Verify that the drivers are installed by running the following command, which should print details about your GPU:

-```
+```bash
 nvidia-smi
 ```

@@ -43,7 +43,7 @@ nvidia-smi

 Create a user for Ollama:

-```
+```bash
 sudo useradd -r -s /bin/false -m -d /usr/share/ollama ollama
 ```

@@ -68,7 +68,7 @@ WantedBy=default.target

 Then start the service:

-```
+```bash
 sudo systemctl daemon-reload
 sudo systemctl enable ollama
 ```
@@ -77,7 +77,7 @@ sudo systemctl enable ollama

 To view logs of Ollama running as a startup service, run:

-```
+```bash
 journalctl -u ollama
 ```

--- a/docs/modelfile.md
+++ b/docs/modelfile.md
@@ -44,7 +44,7 @@ INSTRUCTION arguments

 An example of a model file creating a mario blueprint:

-```
+```modelfile
 FROM llama2
 # sets the temperature to 1 [higher is more creative, lower is more coherent]
 PARAMETER temperature 1
@@ -70,13 +70,13 @@ More examples are available in the [examples directory](../examples).

 The FROM instruction defines the base model to use when creating a model.

-```
+```modelfile
 FROM <model name>:<tag>
 ```

 #### Build from llama2

-```
+```modelfile
 FROM llama2
 ```

@@ -85,7 +85,7 @@ A list of available base models:

 #### Build from a bin file

-```
+```modelfile
 FROM ./ollama-model.bin
 ```

@@ -95,7 +95,7 @@ This bin file location should be specified as an absolute path or relative to th

 The EMBED instruction is used to add embeddings of files to a model. This is useful for adding custom data that the model can reference when generating an answer. Note that currently only text files are supported, formatted with each line as one embedding.

-```
+```modelfile
 FROM <model name>:<tag>
 EMBED <file path>.txt
 EMBED <different file path>.txt
@@ -106,7 +106,7 @@ EMBED <path to directory>/*.txt

 The `PARAMETER` instruction defines a parameter that can be set when the model is run.

-```
+```modelfile
 PARAMETER <parameter> <parametervalue>
 ```

@@ -142,7 +142,7 @@ PARAMETER <parameter> <parametervalue>
 | `{{ .Prompt }}` | The incoming prompt, this is not specified in the model file and will be set based on input.                 |
 | `{{ .First }}`  | A boolean value used to render specific template information for the first generation of a session.          |

-```
+```modelfile
 TEMPLATE """
 {{- if .First }}
 ### System:
@@ -162,7 +162,7 @@ SYSTEM """<system message>"""

 The `SYSTEM` instruction specifies the system prompt to be used in the template, if applicable.

-```
+```modelfile
 SYSTEM """<system message>"""
 ```

@@ -170,7 +170,7 @@ SYSTEM """<system message>"""

 The `ADAPTER` instruction specifies the LoRA adapter to apply to the base model. The value of this instruction should be an absolute path or a path relative to the Modelfile and the file must be in a GGML file format. The adapter should be tuned from the base model otherwise the behaviour is undefined.

-```
+```modelfile
 ADAPTER ./ollama-lora.bin
 ```

@@ -178,7 +178,7 @@ ADAPTER ./ollama-lora.bin

 The `LICENSE` instruction allows you to specify the legal license under which the model used with this Modelfile is shared or distributed.

-```
+```modelfile
 LICENSE """
 <license text>
 """
--- a/llm/llama.go
+++ b/llm/llama.go
@@ -218,7 +218,6 @@ func NumGPU(numLayer, fileSizeBytes int64, opts api.Options) int {
 	if opts.NumGPU != -1 {
 		return opts.NumGPU
 	}
-	n := 1 // default to enable metal on macOS
 	if runtime.GOOS == "linux" {
 		vramMib, err := CheckVRAM()
 		if err != nil {
@@ -235,10 +234,11 @@ func NumGPU(numLayer, fileSizeBytes int64, opts api.Options) int {
 		// TODO: this is a rough heuristic, better would be to calculate this based on number of layers and context size
 		bytesPerLayer := fileSizeBytes / numLayer

-		// set n to the max number of layers we can fit in VRAM
-		return int(totalVramBytes / bytesPerLayer)
+		// max number of layers we can fit in VRAM
+		layers := int(totalVramBytes / bytesPerLayer)
+		log.Printf("%d MiB VRAM available, loading up to %d GPU layers", vramMib, layers)

-		log.Printf("%d MiB VRAM available, loading up to %d GPU layers", vramMib, n)
+		return layers
 	}
 	// default to enable metal on macOS
 	return 1
@@ -417,28 +417,25 @@ type Prediction struct {
 }

 type PredictRequest struct {
-	Stream           bool            `json:"stream"`
-	NPredict         int             `json:"n_predict,omitempty"`
-	TopK             int             `json:"top_k,omitempty"`
-	TopP             float32         `json:"top_p,omitempty"`
-	TfsZ             float32         `json:"tfs_z,omitempty"`
-	TypicalP         float32         `json:"typical_p,omitempty"`
-	RepeatLastN      int             `json:"repeat_last_n,omitempty"`
-	Temperature      float32         `json:"temperature,omitempty"`
-	RepeatPenalty    float32         `json:"repeat_penalty,omitempty"`
-	PresencePenalty  float32         `json:"presence_penalty,omitempty"`
-	FrequencyPenalty float32         `json:"frequency_penalty,omitempty"`
-	Mirostat         int             `json:"mirostat,omitempty"`
-	MirostatTau      float32         `json:"mirostat_tau,omitempty"`
-	MirostatEta      float32         `json:"mirostat_eta,omitempty"`
-	PenalizeNl       bool            `json:"penalize_nl,omitempty"`
-	NKeep            int             `json:"n_keep,omitempty"`
-	Seed             int             `json:"seed,omitempty"`
-	Prompt           string          `json:"prompt,omitempty"`
-	NProbs           int             `json:"n_probs,omitempty"`
-	LogitBias        map[int]float32 `json:"logit_bias,omitempty"`
-	IgnoreEos        bool            `json:"ignore_eos,omitempty"`
-	Stop             []string        `json:"stop,omitempty"`
+	Prompt           string   `json:"prompt"`
+	Stream           bool     `json:"stream"`
+	NPredict         int      `json:"n_predict"`
+	NKeep            int      `json:"n_keep"`
+	Temperature      float32  `json:"temperature"`
+	TopK             int      `json:"top_k"`
+	TopP             float32  `json:"top_p"`
+	TfsZ             float32  `json:"tfs_z"`
+	TypicalP         float32  `json:"typical_p"`
+	RepeatLastN      int      `json:"repeat_last_n"`
+	RepeatPenalty    float32  `json:"repeat_penalty"`
+	PresencePenalty  float32  `json:"presence_penalty"`
+	FrequencyPenalty float32  `json:"frequency_penalty"`
+	Mirostat         int      `json:"mirostat"`
+	MirostatTau      float32  `json:"mirostat_tau"`
+	MirostatEta      float32  `json:"mirostat_eta"`
+	PenalizeNl       bool     `json:"penalize_nl"`
+	Seed             int      `json:"seed"`
+	Stop             []string `json:"stop,omitempty"`
 }

 func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string, fn func(api.GenerateResponse)) error {
@@ -470,8 +467,10 @@ func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string,
 		MirostatTau:      llm.MirostatTau,
 		MirostatEta:      llm.MirostatEta,
 		PenalizeNl:       llm.PenalizeNewline,
+		Seed:             llm.Seed,
 		Stop:             llm.Stop,
 	}
+
 	data, err := json.Marshal(predReq)
 	if err != nil {
 		return fmt.Errorf("error marshaling data: %v", err)
--- a/scripts/build_docker.sh
+++ b/scripts/build_docker.sh
--- a/server/images.go
+++ b/server/images.go
@@ -54,54 +54,6 @@ type Model struct {
 	Embeddings    []vector.Embedding
 }

-func (m *Model) ChatPrompt(messages []api.Message) (string, error) {
-	tmpl, err := template.New("").Parse(m.Template)
-	if err != nil {
-		return "", err
-	}
-
-	var vars struct {
-		System string
-		Prompt string
-		First  bool
-	}
-
-	vars.First = true
-
-	var sb strings.Builder
-	flush := func() {
-		tmpl.Execute(&sb, vars)
-		vars.System = ""
-		vars.Prompt = ""
-	}
-
-	// build the chat history from messages
-	for _, m := range messages {
-		if m.Role == "system" {
-			if vars.System != "" {
-				flush()
-			}
-			vars.System = m.Content
-		}
-
-		if m.Role == "user" {
-			if vars.Prompt != "" {
-				flush()
-			}
-			vars.Prompt = m.Content
-		}
-
-		if m.Role == "assistant" {
-			flush()
-			sb.Write([]byte(m.Content))
-		}
-	}
-
-	flush()
-
-	return sb.String(), nil
-}
-
 func (m *Model) Prompt(request api.GenerateRequest, embedding string) (string, error) {
 	t := m.Template
 	if request.Template != "" {
--- a/server/routes.go
+++ b/server/routes.go
@@ -156,54 +156,6 @@ func load(ctx context.Context, workDir string, model *Model, reqOpts map[string]
 	return nil
 }

-func ChatModelHandler(c *gin.Context) {
-	loaded.mu.Lock()
-	defer loaded.mu.Unlock()
-
-	var req api.ChatRequest
-	if err := c.ShouldBindJSON(&req); err != nil {
-		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
-		return
-	}
-
-	model, err := GetModel(req.Model)
-	if err != nil {
-		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
-		return
-	}
-
-	prompt, err := model.ChatPrompt(req.Messages)
-	if err != nil {
-		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
-		return
-	}
-
-	var response string
-	fn := func(r api.GenerateResponse) {
-		response += r.Response
-	}
-
-	workDir := c.GetString("workDir")
-	if err := load(c.Request.Context(), workDir, model, nil, defaultSessionDuration); err != nil {
-		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
-		return
-	}
-
-	fmt.Println(prompt)
-
-	if err := loaded.llm.Predict(c.Request.Context(), []int{}, prompt, fn); err != nil {
-		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
-	}
-
-	c.JSON(http.StatusOK, api.ChatResponse{
-		Message: api.Message{
-			Role:    "assistant",
-			Content: response,
-		},
-		CreatedAt: time.Now().UTC(),
-	})
-}
-
 func GenerateHandler(c *gin.Context) {
 	loaded.mu.Lock()
 	defer loaded.mu.Unlock()
@@ -600,7 +552,6 @@ func Serve(ln net.Listener, allowOrigins []string) error {
 		},
 	)

-	r.POST("/api/chat", ChatModelHandler)
 	r.POST("/api/pull", PullModelHandler)
 	r.POST("/api/generate", GenerateHandler)
 	r.POST("/api/embeddings", EmbeddingHandler)
Author	SHA1	Message	Date
Patrick Devine	1852755154	show a default message when license/parameters/system prompt/template aren't specified (#681 )	2023-10-02 14:34:52 -07:00
Bruce MacDonald	b1f7123301	clean up num_gpu calculation code (#673 )	2023-10-02 14:53:42 -04:00
Bruce MacDonald	1fbf3585d6	Relay default values to llama runner (#672 ) * include seed in params for llama.cpp server and remove empty filter for temp * relay default predict options to llama.cpp - reorganize options to match predict request for readability * omit empty stop --------- Co-authored-by: hallh <hallh@users.noreply.github.com>	2023-10-02 14:53:16 -04:00
Patrick Devine	99d5161e8a	don't wordwrap when stdout is redirected or piped (#662 )	2023-10-02 11:50:55 -07:00
Michael	ea8380be45	add community project: Chatbot Ollama add community project: Chatbot Ollama by @ivanfioravanti	2023-10-02 09:04:31 -07:00
Jeffrey Morgan	4f25092dc1	fix `build_docker.sh` permissions	2023-10-01 16:42:32 -07:00
Jiayu Liu	4fc10acce9	add some missing code directives in docs (#664 )	2023-10-01 11:51:01 -07:00