use a pulsating spinner

only set main_gpu if value > 0 is provided
remove redundant filename parameter (#1213 )
2025-12-24 08:10:54 -05:00 · 2023-11-20 17:27:53 -08:00 · 2023-11-20 19:54:04 -05:00 · 2023-11-20 17:05:36 -05:00 · 2023-11-20 13:56:39 -08:00 · 2023-11-20 13:44:31 -08:00
43 changed files with 1453 additions and 1775 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -6,3 +6,4 @@ scripts
 llm/llama.cpp/ggml
 llm/llama.cpp/gguf
 .env
+.cache
--- a/.gitignore
+++ b/.gitignore
@@ -6,3 +6,4 @@
 dist
 ollama
 ggml-metal.metal
+.cache
--- a/README.md
+++ b/README.md
@@ -206,7 +206,7 @@ Ollama has a REST API for running and managing models.
 For example, to generate text from a model:

 ```
-curl -X POST http://localhost:11434/api/generate -d '{
+curl http://localhost:11434/api/generate -d '{
  "model": "llama2",
  "prompt":"Why is the sky blue?"
 }'
@@ -225,6 +225,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [Web UI](https://github.com/ollama-webui/ollama-webui)
 - [Ollamac](https://github.com/kevinhermawan/Ollamac)
 - [big-AGI](https://github.com/enricoros/big-agi/blob/main/docs/config-ollama.md)
+- [Cheshire Cat assistant framework](https://github.com/cheshire-cat-ai/core)

 ### Terminal

@@ -233,11 +234,13 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [Emacs client](https://github.com/zweifisch/ollama)
 - [gen.nvim](https://github.com/David-Kunz/gen.nvim)
 - [ollama.nvim](https://github.com/nomnivore/ollama.nvim)
+- [ogpt.nvim](https://github.com/huynle/ogpt.nvim)
 - [gptel Emacs client](https://github.com/karthink/gptel)

 ### Libraries

 - [LangChain](https://python.langchain.com/docs/integrations/llms/ollama) and [LangChain.js](https://js.langchain.com/docs/modules/model_io/models/llms/integrations/ollama) with [example](https://js.langchain.com/docs/use_cases/question_answering/local_retrieval_qa)
+- [LangChainGo](https://github.com/tmc/langchaingo/) with [example](https://github.com/tmc/langchaingo/tree/main/examples/ollama-completion-example)
 - [LlamaIndex](https://gpt-index.readthedocs.io/en/stable/examples/llm/ollama.html)
 - [LiteLLM](https://github.com/BerriAI/litellm)
 - [OllamaSharp for .NET](https://github.com/awaescher/OllamaSharp)
@@ -246,6 +249,11 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [ModelFusion Typescript Library](https://modelfusion.dev/integration/model-provider/ollama)
 - [OllamaKit for Swift](https://github.com/kevinhermawan/OllamaKit)
 - [Ollama for Dart](https://github.com/breitburg/dart-ollama)
+- [Ollama for Laravel](https://github.com/cloudstudio/ollama-laravel)
+
+### Mobile
+
+- [Maid](https://github.com/danemadsen/Maid) (Mobile Artificial Intelligence Distribution)

 ### Extensions & Plugins

@@ -257,3 +265,4 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [Dagger Chatbot](https://github.com/samalba/dagger-chatbot)
 - [Discord AI Bot](https://github.com/mekb-turtle/discord-ai-bot)
 - [Hass Ollama Conversation](https://github.com/ej52/hass-ollama-conversation)
+- [Rivet plugin](https://github.com/abrenneke/rivet-plugin-ollama)
--- a/api/client.go
+++ b/api/client.go
@@ -5,6 +5,7 @@ import (
 	"bytes"
 	"context"
 	"encoding/json"
+	"errors"
 	"fmt"
 	"io"
 	"net"
@@ -95,11 +96,19 @@ func (c *Client) do(ctx context.Context, method, path string, reqData, respData
 	var reqBody io.Reader
 	var data []byte
 	var err error
-	if reqData != nil {
+
+	switch reqData := reqData.(type) {
+	case io.Reader:
+		// reqData is already an io.Reader
+		reqBody = reqData
+	case nil:
+		// noop
+	default:
 		data, err = json.Marshal(reqData)
 		if err != nil {
 			return err
 		}
+
 		reqBody = bytes.NewReader(data)
 	}

@@ -287,3 +296,18 @@ func (c *Client) Heartbeat(ctx context.Context) error {
 	}
 	return nil
 }
+
+func (c *Client) CreateBlob(ctx context.Context, digest string, r io.Reader) error {
+	if err := c.do(ctx, http.MethodHead, fmt.Sprintf("/api/blobs/%s", digest), nil, nil); err != nil {
+		var statusError StatusError
+		if !errors.As(err, &statusError) || statusError.StatusCode != http.StatusNotFound {
+			return err
+		}
+
+		if err := c.do(ctx, http.MethodPost, fmt.Sprintf("/api/blobs/%s", digest), r, nil); err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
--- a/api/types.go
+++ b/api/types.go
@@ -99,9 +99,10 @@ type EmbeddingResponse struct {
 }

 type CreateRequest struct {
-	Name   string `json:"name"`
-	Path   string `json:"path"`
-	Stream *bool  `json:"stream,omitempty"`
+	Name      string `json:"name"`
+	Path      string `json:"path"`
+	Modelfile string `json:"modelfile"`
+	Stream    *bool  `json:"stream,omitempty"`
 }

 type DeleteRequest struct {
--- a/cmd/cmd.go
+++ b/cmd/cmd.go
@@ -1,9 +1,11 @@
 package cmd

 import (
+	"bytes"
 	"context"
 	"crypto/ed25519"
 	"crypto/rand"
+	"crypto/sha256"
 	"encoding/pem"
 	"errors"
 	"fmt"
@@ -27,7 +29,8 @@ import (

 	"github.com/jmorganca/ollama/api"
 	"github.com/jmorganca/ollama/format"
-	"github.com/jmorganca/ollama/progressbar"
+	"github.com/jmorganca/ollama/parser"
+	"github.com/jmorganca/ollama/progress"
 	"github.com/jmorganca/ollama/readline"
 	"github.com/jmorganca/ollama/server"
 	"github.com/jmorganca/ollama/version"
@@ -45,49 +48,95 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 		return err
 	}

-	var spinner *Spinner
+	p := progress.NewProgress(os.Stderr)
+	defer p.Stop()

-	var currentDigest string
-	var bar *progressbar.ProgressBar
+	bars := make(map[string]*progress.Bar)
+
+	modelfile, err := os.ReadFile(filename)
+	if err != nil {
+		return err
+	}
+
+	commands, err := parser.Parse(bytes.NewReader(modelfile))
+	if err != nil {
+		return err
+	}
+
+	home, err := os.UserHomeDir()
+	if err != nil {
+		return err
+	}
+
+	status := "transferring model data"
+	spinner := progress.NewSpinner(status)
+	p.Add(status, spinner)
+
+	for _, c := range commands {
+		switch c.Name {
+		case "model", "adapter":
+			path := c.Args
+			if path == "~" {
+				path = home
+			} else if strings.HasPrefix(path, "~/") {
+				path = filepath.Join(home, path[2:])
+			}
+
+			if !filepath.IsAbs(path) {
+				path = filepath.Join(filepath.Dir(filename), path)
+			}
+
+			bin, err := os.Open(path)
+			if errors.Is(err, os.ErrNotExist) && c.Name == "model" {
+				continue
+			} else if err != nil {
+				return err
+			}
+			defer bin.Close()
+
+			hash := sha256.New()
+			if _, err := io.Copy(hash, bin); err != nil {
+				return err
+			}
+			bin.Seek(0, io.SeekStart)
+
+			digest := fmt.Sprintf("sha256:%x", hash.Sum(nil))
+			if err = client.CreateBlob(cmd.Context(), digest, bin); err != nil {
+				return err
+			}
+
+			modelfile = bytes.ReplaceAll(modelfile, []byte(c.Args), []byte("@"+digest))
+		}
+	}

-	request := api.CreateRequest{Name: args[0], Path: filename}
 	fn := func(resp api.ProgressResponse) error {
-		if resp.Digest != currentDigest && resp.Digest != "" {
-			if spinner != nil {
-				spinner.Stop()
+		if resp.Digest != "" {
+			spinner.Stop()
+
+			bar, ok := bars[resp.Digest]
+			if !ok {
+				bar = progress.NewBar(fmt.Sprintf("pulling %s...", resp.Digest[7:19]), resp.Total, resp.Completed)
+				bars[resp.Digest] = bar
+				p.Add(resp.Digest, bar)
 			}
-			currentDigest = resp.Digest
-			// pulling
-			bar = progressbar.DefaultBytes(
-				resp.Total,
-				resp.Status,
-			)
-			bar.Set64(resp.Completed)
-		} else if resp.Digest == currentDigest && resp.Digest != "" {
-			bar.Set64(resp.Completed)
-		} else {
-			currentDigest = ""
-			if spinner != nil {
-				spinner.Stop()
-			}
-			spinner = NewSpinner(resp.Status)
-			go spinner.Spin(100 * time.Millisecond)
+
+			bar.Set(resp.Completed)
+		} else if status != resp.Status {
+			spinner.Stop()
+
+			status = resp.Status
+			spinner = progress.NewSpinner(status)
+			p.Add(status, spinner)
 		}

 		return nil
 	}

+	request := api.CreateRequest{Name: args[0], Modelfile: string(modelfile)}
 	if err := client.Create(context.Background(), &request, fn); err != nil {
 		return err
 	}

-	if spinner != nil {
-		spinner.Stop()
-		if spinner.description != "success" {
-			return errors.New("unexpected end to create model")
-		}
-	}
-
 	return nil
 }

@@ -124,36 +173,46 @@ func PushHandler(cmd *cobra.Command, args []string) error {
 		return err
 	}

-	var currentDigest string
-	var bar *progressbar.ProgressBar
+	p := progress.NewProgress(os.Stderr)
+	defer p.Stop()
+
+	bars := make(map[string]*progress.Bar)
+	var status string
+	var spinner *progress.Spinner

-	request := api.PushRequest{Name: args[0], Insecure: insecure}
 	fn := func(resp api.ProgressResponse) error {
-		if resp.Digest != currentDigest && resp.Digest != "" {
-			currentDigest = resp.Digest
-			bar = progressbar.DefaultBytes(
-				resp.Total,
-				fmt.Sprintf("pushing %s...", resp.Digest[7:19]),
-			)
+		if resp.Digest != "" {
+			if spinner != nil {
+				spinner.Stop()
+			}

-			bar.Set64(resp.Completed)
-		} else if resp.Digest == currentDigest && resp.Digest != "" {
-			bar.Set64(resp.Completed)
-		} else {
-			currentDigest = ""
-			fmt.Println(resp.Status)
+			bar, ok := bars[resp.Digest]
+			if !ok {
+				bar = progress.NewBar(fmt.Sprintf("pushing %s...", resp.Digest[7:19]), resp.Total, resp.Completed)
+				bars[resp.Digest] = bar
+				p.Add(resp.Digest, bar)
+			}
+
+			bar.Set(resp.Completed)
+		} else if status != resp.Status {
+			if spinner != nil {
+				spinner.Stop()
+			}
+
+			status = resp.Status
+			spinner = progress.NewSpinner(status)
+			p.Add(status, spinner)
 		}
+
 		return nil
 	}

+	request := api.PushRequest{Name: args[0], Insecure: insecure}
 	if err := client.Push(context.Background(), &request, fn); err != nil {
 		return err
 	}

-	if bar != nil && !bar.IsFinished() {
-		return errors.New("unexpected end to push model")
-	}
-
+	spinner.Stop()
 	return nil
 }

@@ -304,46 +363,51 @@ func PullHandler(cmd *cobra.Command, args []string) error {
 		return err
 	}

-	return pull(args[0], insecure)
-}
-
-func pull(model string, insecure bool) error {
 	client, err := api.ClientFromEnvironment()
 	if err != nil {
 		return err
 	}

-	var currentDigest string
-	var bar *progressbar.ProgressBar
+	p := progress.NewProgress(os.Stderr)
+	defer p.Stop()
+
+	bars := make(map[string]*progress.Bar)
+
+	var status string
+	var spinner *progress.Spinner

-	request := api.PullRequest{Name: model, Insecure: insecure}
 	fn := func(resp api.ProgressResponse) error {
-		if resp.Digest != currentDigest && resp.Digest != "" {
-			currentDigest = resp.Digest
-			bar = progressbar.DefaultBytes(
-				resp.Total,
-				fmt.Sprintf("pulling %s...", resp.Digest[7:19]),
-			)
+		if resp.Digest != "" {
+			if spinner != nil {
+				spinner.Stop()
+			}

-			bar.Set64(resp.Completed)
-		} else if resp.Digest == currentDigest && resp.Digest != "" {
-			bar.Set64(resp.Completed)
-		} else {
-			currentDigest = ""
-			fmt.Println(resp.Status)
+			bar, ok := bars[resp.Digest]
+			if !ok {
+				bar = progress.NewBar(fmt.Sprintf("pulling %s...", resp.Digest[7:19]), resp.Total, resp.Completed)
+				bars[resp.Digest] = bar
+				p.Add(resp.Digest, bar)
+			}
+
+			bar.Set(resp.Completed)
+		} else if status != resp.Status {
+			if spinner != nil {
+				spinner.Stop()
+			}
+
+			status = resp.Status
+			spinner = progress.NewSpinner(status)
+			p.Add(status, spinner)
 		}

 		return nil
 	}

+	request := api.PullRequest{Name: args[0], Insecure: insecure}
 	if err := client.Pull(context.Background(), &request, fn); err != nil {
 		return err
 	}

-	if bar != nil && !bar.IsFinished() {
-		return errors.New("unexpected end to pull model")
-	}
-
 	return nil
 }

@@ -396,8 +460,11 @@ func generate(cmd *cobra.Command, model, prompt string, wordWrap bool, format st
 		return err
 	}

-	spinner := NewSpinner("")
-	go spinner.Spin(60 * time.Millisecond)
+	p := progress.NewProgress(os.Stderr)
+	defer p.StopAndClear()
+
+	spinner := progress.NewSpinner("")
+	p.Add("", spinner)

 	var latest api.GenerateResponse

@@ -429,9 +496,7 @@ func generate(cmd *cobra.Command, model, prompt string, wordWrap bool, format st

 	request := api.GenerateRequest{Model: model, Prompt: prompt, Context: generateContext, Format: format}
 	fn := func(response api.GenerateResponse) error {
-		if !spinner.IsFinished() {
-			spinner.Finish()
-		}
+		p.StopAndClear()

 		latest = response

@@ -465,7 +530,6 @@ func generate(cmd *cobra.Command, model, prompt string, wordWrap bool, format st

 	if err := client.Generate(cancelCtx, &request, fn); err != nil {
 		if strings.Contains(err.Error(), "context canceled") && abort {
-			spinner.Finish()
 			return nil
 		}
 		return err
--- a/cmd/spinner.go
+++ b/cmd/spinner.go
@@ -1,44 +0,0 @@
-package cmd
-
-import (
-	"fmt"
-	"os"
-	"time"
-
-	"github.com/jmorganca/ollama/progressbar"
-)
-
-type Spinner struct {
-	description string
-	*progressbar.ProgressBar
-}
-
-func NewSpinner(description string) *Spinner {
-	return &Spinner{
-		description: description,
-		ProgressBar: progressbar.NewOptions(-1,
-			progressbar.OptionSetWriter(os.Stderr),
-			progressbar.OptionThrottle(60*time.Millisecond),
-			progressbar.OptionSpinnerType(14),
-			progressbar.OptionSetRenderBlankState(true),
-			progressbar.OptionSetElapsedTime(false),
-			progressbar.OptionClearOnFinish(),
-			progressbar.OptionSetDescription(description),
-		),
-	}
-}
-
-func (s *Spinner) Spin(tick time.Duration) {
-	for range time.Tick(tick) {
-		if s.IsFinished() {
-			break
-		}
-
-		s.Add(1)
-	}
-}
-
-func (s *Spinner) Stop() {
-	s.Finish()
-	fmt.Println(s.description)
-}
--- a/docs/api.md
+++ b/docs/api.md
@@ -51,14 +51,16 @@ Advanced parameters (optional):

 ### JSON mode

-Enable JSON mode by setting the `format` parameter to `json` and specifying the model should use JSON in the `prompt`. This will structure the response as valid JSON. See the JSON mode [example](#request-json-mode) below.
+Enable JSON mode by setting the `format` parameter to `json`. This will structure the response as valid JSON. See the JSON mode [example](#request-json-mode) below.
+
+> Note: it's important to instruct the model to use JSON in the `prompt`. Otherwise, the model may generate large amounts whitespace.

 ### Examples

 #### Request

 ```shell
-curl -X POST http://localhost:11434/api/generate -d '{
+curl http://localhost:11434/api/generate -d '{
  "model": "llama2",
  "prompt": "Why is the sky blue?"
 }'
@@ -113,8 +115,8 @@ To calculate how fast the response is generated in tokens per second (token/s),
 #### Request (No streaming)

 ```shell
-curl -X POST http://localhost:11434/api/generate -d '{
-  "model": "llama2:7b",
+curl http://localhost:11434/api/generate -d '{
+  "model": "llama2",
  "prompt": "Why is the sky blue?",
  "stream": false
 }'
@@ -126,7 +128,7 @@ If `stream` is set to `false`, the response will be a single JSON object:

 ```json
 {
-  "model": "llama2:7b",
+  "model": "llama2",
  "created_at": "2023-08-04T19:22:45.499127Z",
  "response": "The sky is blue because it is the color of the sky.",
  "context": [1, 2, 3],
@@ -147,7 +149,7 @@ If `stream` is set to `false`, the response will be a single JSON object:
 In some cases you may wish to bypass the templating system and provide a full prompt. In this case, you can use the `raw` parameter to disable formatting and context.

 ```shell
-curl -X POST http://localhost:11434/api/generate -d '{
+curl http://localhost:11434/api/generate -d '{
  "model": "mistral",
  "prompt": "[INST] why is the sky blue? [/INST]",
  "raw": true,
@@ -175,7 +177,7 @@ curl -X POST http://localhost:11434/api/generate -d '{
 #### Request (JSON mode)

 ```shell
-curl -X POST http://localhost:11434/api/generate -d '{
+curl http://localhost:11434/api/generate -d '{
  "model": "llama2",
  "prompt": "What color is the sky at different times of the day? Respond using JSON",
  "format": "json",
@@ -224,8 +226,8 @@ The value of `response` will be a string containing JSON similar to:
 If you want to set custom options for the model at runtime rather than in the Modelfile, you can do so with the `options` parameter. This example sets every available option, but you can set any of them individually and omit the ones you do not want to override.

 ```shell
-curl -X POST http://localhost:11434/api/generate -d '{
-  "model": "llama2:7b",
+curl http://localhost:11434/api/generate -d '{
+  "model": "llama2",
  "prompt": "Why is the sky blue?",
  "stream": false,
  "options": {
@@ -270,7 +272,7 @@ curl -X POST http://localhost:11434/api/generate -d '{

 ```json
 {
-  "model": "llama2:7b",
+  "model": "llama2",
  "created_at": "2023-08-04T19:22:45.499127Z",
  "response": "The sky is blue because it is the color of the sky.",
  "context": [1, 2, 3],
@@ -292,22 +294,23 @@ curl -X POST http://localhost:11434/api/generate -d '{
 POST /api/create
 ```

-Create a model from a [`Modelfile`](./modelfile.md)
+Create a model from a [`Modelfile`](./modelfile.md). It is recommended to set `modelfile` to the content of the Modelfile rather than just set `path`. This is a requirement for remote create. Remote model creation should also create any file blobs, fields such as `FROM` and `ADAPTER`, explicitly with the server using [Create a Blob](#create-a-blob) and the value to the path indicated in the response.

 ### Parameters

 - `name`: name of the model to create
- `path`: path to the Modelfile
+- `modelfile`: contents of the Modelfile
 - `stream`: (optional) if `false` the response will be returned as a single response object, rather than a stream of objects
+- `path` (deprecated): path to the Modelfile

 ### Examples

 #### Request

 ```shell
-curl -X POST http://localhost:11434/api/create -d '{
+curl http://localhost:11434/api/create -d '{
  "name": "mario",
-  "path": "~/Modelfile"
+  "modelfile": "FROM llama2\nSYSTEM You are mario from Super Mario Bros."
 }'
 ```

@@ -321,6 +324,54 @@ A stream of JSON objects. When finished, `status` is `success`.
 }
 ```

+### Check if a Blob Exists
+
+```shell
+HEAD /api/blobs/:digest
+```
+
+Check if a blob is known to the server.
+
+#### Query Parameters
+
+- `digest`: the SHA256 digest of the blob
+
+#### Examples
+
+##### Request
+
+```shell
+curl -I http://localhost:11434/api/blobs/sha256:29fdb92e57cf0827ded04ae6461b5931d01fa595843f55d36f5b275a52087dd2
+```
+
+##### Response
+
+Return 200 OK if the blob exists, 404 Not Found if it does not.
+
+### Create a Blob
+
+```shell
+POST /api/blobs/:digest
+```
+
+Create a blob from a file. Returns the server file path.
+
+#### Query Parameters
+
+- `digest`: the expected SHA256 digest of the file
+
+#### Examples
+
+##### Request
+
+```shell
+curl -T model.bin -X POST http://localhost:11434/api/blobs/sha256:29fdb92e57cf0827ded04ae6461b5931d01fa595843f55d36f5b275a52087dd2
+```
+
+##### Response
+
+Return 201 Created if the blob was successfully created.
+
 ## List Local Models

 ```shell
@@ -345,7 +396,7 @@ A single JSON object will be returned.
 {
  "models": [
    {
-      "name": "llama2:7b",
+      "name": "llama2",
      "modified_at": "2023-08-02T17:02:23.713454393-07:00",
      "size": 3791730596
    },
@@ -376,7 +427,7 @@ Show details about a model including modelfile, template, parameters, license, a

 ```shell
 curl http://localhost:11434/api/show -d '{
-  "name": "llama2:7b"
+  "name": "llama2"
 }'
 ```

@@ -385,9 +436,9 @@ curl http://localhost:11434/api/show -d '{
 ```json
 {
  "license": "<contents of license block>",
-  "modelfile": "# Modelfile generated by \"ollama show\"\n# To build a new Modelfile based on this one, replace the FROM line with:\n# FROM llama2:latest\n\nFROM /Users/username/.ollama/models/blobs/sha256:8daa9615cce30c259a9555b1cc250d461d1bc69980a274b44d7eda0be78076d8\nTEMPLATE \"\"\"[INST] <<SYS>>{{ .System }}<</SYS>>\n\n{{ .Prompt }} [/INST] \"\"\"\nSYSTEM \"\"\"\"\"\"\nPARAMETER stop [INST]\nPARAMETER stop [/INST]\nPARAMETER stop <<SYS>>\nPARAMETER stop <</SYS>>\n",
+  "modelfile": "# Modelfile generated by \"ollama show\"\n# To build a new Modelfile based on this one, replace the FROM line with:\n# FROM llama2:latest\n\nFROM /Users/username/.ollama/models/blobs/sha256:8daa9615cce30c259a9555b1cc250d461d1bc69980a274b44d7eda0be78076d8\nTEMPLATE \"\"\"[INST] {{ if and .First .System }}<<SYS>>{{ .System }}<</SYS>>\n\n{{ end }}{{ .Prompt }} [/INST] \"\"\"\nSYSTEM \"\"\"\"\"\"\nPARAMETER stop [INST]\nPARAMETER stop [/INST]\nPARAMETER stop <<SYS>>\nPARAMETER stop <</SYS>>\n",
  "parameters": "stop                           [INST]\nstop                           [/INST]\nstop                           <<SYS>>\nstop                           <</SYS>>",
-  "template": "[INST] <<SYS>>{{ .System }}<</SYS>>\n\n{{ .Prompt }} [/INST] "
+  "template": "[INST] {{ if and .First .System }}<<SYS>>{{ .System }}<</SYS>>\n\n{{ end }}{{ .Prompt }} [/INST] "
 }
 ```

@@ -405,7 +456,7 @@ Copy a model. Creates a model with another name from an existing model.

 ```shell
 curl http://localhost:11434/api/copy -d '{
-  "source": "llama2:7b",
+  "source": "llama2",
  "destination": "llama2-backup"
 }'
 ```
@@ -459,8 +510,8 @@ Download a model from the ollama library. Cancelled pulls are resumed from where
 #### Request

 ```shell
-curl -X POST http://localhost:11434/api/pull -d '{
-  "name": "llama2:7b"
+curl http://localhost:11434/api/pull -d '{
+  "name": "llama2"
 }'
 ```

@@ -531,7 +582,7 @@ Upload a model to a model library. Requires registering for ollama.ai and adding
 #### Request

 ```shell
-curl -X POST http://localhost:11434/api/push -d '{
+curl http://localhost:11434/api/push -d '{
  "name": "mattw/pygmalion:latest"
 }'
 ```
@@ -599,8 +650,8 @@ Advanced parameters:
 #### Request

 ```shell
-curl -X POST http://localhost:11434/api/embeddings -d '{
-  "model": "llama2:7b",
+curl http://localhost:11434/api/embeddings -d '{
+  "model": "llama2",
  "prompt": "Here is an article about llamas..."
 }'
 ```
--- a/docs/faq.md
+++ b/docs/faq.md
@@ -32,11 +32,11 @@ Create a `systemd` drop-in directory and set `Environment=OLLAMA_HOST`

 ```bash
 mkdir -p /etc/systemd/system/ollama.service.d
-echo "[Service]" >>/etc/systemd/system/ollama.service.d/environment.conf
+echo '[Service]' >>/etc/systemd/system/ollama.service.d/environment.conf
 ```

 ```bash
-echo "Environment=OLLAMA_HOST=0.0.0.0:11434" >>/etc/systemd/system/ollama.service.d/environment.conf
+echo 'Environment="OLLAMA_HOST=0.0.0.0:11434"' >>/etc/systemd/system/ollama.service.d/environment.conf
 ```

 Reload `systemd` and restart Ollama:
@@ -59,7 +59,7 @@ OLLAMA_ORIGINS=http://192.168.1.1:*,https://example.com ollama serve
 On Linux:

 ```bash
-echo "Environment=OLLAMA_ORIGINS=http://129.168.1.1:*,https://example.com" >>/etc/systemd/system/ollama.service.d/environment.conf
+echo 'Environment="OLLAMA_ORIGINS=http://129.168.1.1:*,https://example.com"' >>/etc/systemd/system/ollama.service.d/environment.conf
 ```

 Reload `systemd` and restart Ollama:
@@ -74,8 +74,6 @@ systemctl restart ollama
 - macOS: Raw model data is stored under `~/.ollama/models`.
 - Linux: Raw model data is stored under `/usr/share/ollama/.ollama/models`

-
-
 Below the models directory you will find a structure similar to the following:

 ```shell
@@ -96,3 +94,63 @@ The manifest lists all the layers used in this model. You will see a `media type
 ### How can I change where Ollama stores models?

 To modify where models are stored, you can use the `OLLAMA_MODELS` environment variable. Note that on Linux this means defining `OLLAMA_MODELS` in a drop-in `/etc/systemd/system/ollama.service.d` service file, reloading systemd, and restarting the ollama service.
+
+## Does Ollama send my prompts and answers back to Ollama.ai to use in any way?
+
+No. Anything you do with Ollama, such as generate a response from the model, stays with you. We don't collect any data about how you use the model. You are always in control of your own data.
+
+## How can I use Ollama in Visual Studio Code?
+
+There is already a large collection of plugins available for VSCode as well as other editors that leverage Ollama. You can see the list of [extensions & plugins](https://github.com/jmorganca/ollama#extensions--plugins) at the bottom of the main repository readme.
+
+## How do I use Ollama behind a proxy?
+
+Ollama is compatible with proxy servers if `HTTP_PROXY` or `HTTPS_PROXY` are configured. When using either variables, ensure it is set where `ollama serve` can access the values.
+
+When using `HTTPS_PROXY`, ensure the proxy certificate is installed as a system certificate.
+
+On macOS:
+
+```bash
+HTTPS_PROXY=http://proxy.example.com ollama serve
+```
+
+On Linux:
+
+```bash
+echo 'Environment="HTTPS_PROXY=https://proxy.example.com"' >>/etc/systemd/system/ollama.service.d/environment.conf
+```
+
+Reload `systemd` and restart Ollama:
+
+```bash
+systemctl daemon-reload
+systemctl restart ollama
+```
+
+### How do I use Ollama behind a proxy in Docker?
+
+The Ollama Docker container image can be configured to use a proxy by passing `-e HTTPS_PROXY=https://proxy.example.com` when starting the container.
+
+Alternatively, Docker daemon can be configured to use a proxy. Instructions are available for Docker Desktop on [macOS](https://docs.docker.com/desktop/settings/mac/#proxies), [Windows](https://docs.docker.com/desktop/settings/windows/#proxies), and [Linux](https://docs.docker.com/desktop/settings/linux/#proxies), and Docker [daemon with systemd](https://docs.docker.com/config/daemon/systemd/#httphttps-proxy).
+
+Ensure the certificate is installed as a system certificate when using HTTPS. This may require a new Docker image when using a self-signed certificate.
+
+```dockerfile
+FROM ollama/ollama
+COPY my-ca.pem /usr/local/share/ca-certificates/my-ca.crt
+RUN update-ca-certificate
+```
+
+Build and run this image:
+
+```shell
+docker build -t ollama-with-ca .
+docker run -d -e HTTPS_PROXY=https://my.proxy.example.com -p 11434:11434 ollama-with-ca
+```
+
+## How do I use Ollama with GPU acceleration in Docker?
+
+The Ollama Docker container can be configured with GPU acceleration in Linux or Windows (with WSL2). This requires the [nvidia-container-toolkit](https://github.com/NVIDIA/nvidia-container-toolkit). See [ollama/ollama](https://hub.docker.com/r/ollama/ollama) for more details.
+
+GPU acceleration is not available for Docker Desktop in macOS due to the lack of GPU passthrough and emulation.
--- a/docs/modelfile.md
+++ b/docs/modelfile.md
@@ -41,6 +41,8 @@ INSTRUCTION arguments

 ## Examples

+### Basic `Modelfile`
+
 An example of a `Modelfile` creating a mario blueprint:

 ```modelfile
@@ -63,6 +65,35 @@ To use this:

 More examples are available in the [examples directory](../examples).

+### `Modelfile`s in [ollama.ai/library][1]
+
+There are two ways to view `Modelfile`s underlying the models in [ollama.ai/library][1]:
+
+- Option 1: view a details page from a model's tags page:
+   1. Go to a particular model's tags (e.g. https://ollama.ai/library/llama2/tags)
+   2. Click on a tag (e.g. https://ollama.ai/library/llama2:13b)
+   3. Scroll down to "Layers"
+      - Note: if the [`FROM` instruction](#from-required) is not present,
+        it means the model was created from a local file
+- Option 2: use `ollama show` to print the `Modelfile` like so:
+
+  ```bash
+  > ollama show --modelfile llama2:13b
+  # Modelfile generated by "ollama show"
+  # To build a new Modelfile based on this one, replace the FROM line with:
+  # FROM llama2:13b
+
+  FROM /root/.ollama/models/blobs/sha256:123abc
+  TEMPLATE """[INST] {{ if and .First .System }}<<SYS>>{{ .System }}<</SYS>>
+
+  {{ end }}{{ .Prompt }} [/INST] """
+  SYSTEM """"""
+  PARAMETER stop [INST]
+  PARAMETER stop [/INST]
+  PARAMETER stop <<SYS>>
+  PARAMETER stop <</SYS>>
+  ```
+
 ## Instructions

 ### FROM (Required)
@@ -129,11 +160,14 @@ PARAMETER <parameter> <parametervalue>
 | --------------- | ------------------------------------------------------------------------------------------------------------ |
 | `{{ .System }}` | The system prompt used to specify custom behavior, this must also be set in the Modelfile as an instruction. |
 | `{{ .Prompt }}` | The incoming prompt, this is not specified in the model file and will be set based on input.                 |
+| `{{ .First }}`  | A boolean value used to render specific template information for the first generation of a session.          |

 ```modelfile
 TEMPLATE """
+{{- if .First }}
 ### System:
 {{ .System }}
+{{- end }}

 ### User:
 {{ .Prompt }}
@@ -174,3 +208,5 @@ LICENSE """

 - the **`Modelfile` is not case sensitive**. In the examples, we use uppercase for instructions to make it easier to distinguish it from arguments.
 - Instructions can be in any order. In the examples, we start with FROM instruction to keep it easily readable.
+
+[1]: https://ollama.ai/library
--- a/examples/jupyter-notebook/README.md
+++ b/examples/jupyter-notebook/README.md
@@ -0,0 +1,5 @@
+# Ollama Jupyter Notebook
+
+This example downloads and installs Ollama in a Jupyter instance such as Google Colab. It will start the Ollama service and expose an endpoint using `ngrok` which can be used to communicate with the Ollama instance remotely.
+
+For best results, use an instance with GPU accelerator.
--- a/examples/jupyter-notebook/ollama.ipynb
+++ b/examples/jupyter-notebook/ollama.ipynb
@@ -0,0 +1,102 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "93f59dcb-c588-41b8-a792-55d88ade739c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Download and run the Ollama Linux install script\n",
+    "!curl https://ollama.ai/install.sh | sh\n",
+    "!command -v systemctl >/dev/null && sudo systemctl stop ollama"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "658c147e-c7f8-490e-910e-62b80f577dda",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install aiohttp pyngrok\n",
+    "\n",
+    "import os\n",
+    "import asyncio\n",
+    "from aiohttp import ClientSession\n",
+    "\n",
+    "# Set LD_LIBRARY_PATH so the system NVIDIA library becomes preferred\n",
+    "# over the built-in library. This is particularly important for \n",
+    "# Google Colab which installs older drivers\n",
+    "os.environ.update({'LD_LIBRARY_PATH': '/usr/lib64-nvidia'})\n",
+    "\n",
+    "async def run(cmd):\n",
+    "  '''\n",
+    "  run is a helper function to run subcommands asynchronously.\n",
+    "  '''\n",
+    "  print('>>> starting', *cmd)\n",
+    "  p = await asyncio.subprocess.create_subprocess_exec(\n",
+    "      *cmd,\n",
+    "      stdout=asyncio.subprocess.PIPE,\n",
+    "      stderr=asyncio.subprocess.PIPE,\n",
+    "  )\n",
+    "\n",
+    "  async def pipe(lines):\n",
+    "    async for line in lines:\n",
+    "      print(line.strip().decode('utf-8'))\n",
+    "\n",
+    "  await asyncio.gather(\n",
+    "      pipe(p.stdout),\n",
+    "      pipe(p.stderr),\n",
+    "  )\n",
+    "\n",
+    "\n",
+    "await asyncio.gather(\n",
+    "    run(['ollama', 'serve']),\n",
+    "    run(['ngrok', 'http', '--log', 'stderr', '11434']),\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e7735a55-9aad-4caf-8683-52e2163ba53b",
+   "metadata": {},
+   "source": [
+    "The previous cell starts two processes, `ollama` and `ngrok`. The log output will show a line like the following which describes the external address.\n",
+    "\n",
+    "```\n",
+    "t=2023-11-12T22:55:56+0000 lvl=info msg=\"started tunnel\" obj=tunnels name=command_line addr=http://localhost:11434 url=https://8249-34-125-179-11.ngrok.io\n",
+    "```\n",
+    "\n",
+    "The external address in this case is `https://8249-34-125-179-11.ngrok.io` which can be passed into `OLLAMA_HOST` to access this instance.\n",
+    "\n",
+    "```bash\n",
+    "export OLLAMA_HOST=https://8249-34-125-179-11.ngrok.io\n",
+    "ollama list\n",
+    "ollama run mistral\n",
+    "```"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/examples/modelfile-sentiments/Modelfile
+++ b/examples/modelfile-sentiments/Modelfile
@@ -3,8 +3,10 @@

 FROM orca
 TEMPLATE """
+{{- if .First }}
 ### System:
 {{ .System }}
+{{- end }}
 ### User: 
 I hate it when my phone dies
 ### Response: 
--- a/examples/modelfile-sentiments/Readme.md
+++ b/examples/modelfile-sentiments/Readme.md
@@ -3,8 +3,10 @@
 This is a simple sentiments analyzer using the Orca model. When you pull Orca from the registry, it has a Template already defined that looks like this:

 ```Modelfile
+{{- if .First }}
 ### System:
 {{ .System }}
+{{- end }}

 ### User:
 {{ .Prompt }}
--- a/examples/python-json-datagenerator/predefinedschema.py
+++ b/examples/python-json-datagenerator/predefinedschema.py
@@ -0,0 +1,31 @@
+import requests
+import json
+import random
+
+model = "llama2"
+template = {
+  "firstName": "", 
+  "lastName": "", 
+  "address": {
+    "street": "", 
+    "city": "", 
+    "state": "", 
+    "zipCode": ""
+  }, 
+  "phoneNumber": ""
+}
+
+prompt = f"generate one realistically believable sample data set of a persons first name, last name, address in the US, and  phone number. \nUse the following template: {json.dumps(template)}."
+
+data = {
+    "prompt": prompt,
+    "model": model,
+    "format": "json",
+    "stream": False,
+    "options": {"temperature": 2.5, "top_p": 0.99, "top_k": 100},
+}
+
+print(f"Generating a sample user")
+response = requests.post("http://localhost:11434/api/generate", json=data, stream=False)
+json_data = json.loads(response.text)
+print(json.dumps(json.loads(json_data["response"]), indent=2))
--- a/examples/python-json-datagenerator/randomaddresses.py
+++ b/examples/python-json-datagenerator/randomaddresses.py
@@ -0,0 +1,31 @@
+import requests
+import json
+import random
+
+countries = [
+    "United States",
+    "United Kingdom",
+    "the Netherlands",
+    "Germany",
+    "Mexico",
+    "Canada",
+    "France",
+]
+country = random.choice(countries)
+model = "llama2"
+
+prompt = f"generate one realistically believable sample data set of a persons first name, last name, address in {country}, and phone number. Do not use common names. Respond using JSON. Key names should have no backslashes, values should use plain ascii with no special characters."
+
+data = {
+    "prompt": prompt,
+    "model": model,
+    "format": "json",
+    "stream": False,
+    "options": {"temperature": 2.5, "top_p": 0.99, "top_k": 100},
+}
+
+print(f"Generating a sample user in {country}")
+response = requests.post("http://localhost:11434/api/generate", json=data, stream=False)
+json_data = json.loads(response.text)
+
+print(json.dumps(json.loads(json_data["response"]), indent=2))
--- a/examples/python-json-datagenerator/readme.md
+++ b/examples/python-json-datagenerator/readme.md
@@ -0,0 +1,34 @@
+# JSON Output Example
+
+![llmjson 2023-11-10 15_31_31](https://github.com/jmorganca/ollama/assets/633681/e599d986-9b4a-4118-81a4-4cfe7e22da25)
+
+There are two python scripts in this example. `randomaddresses.py` generates random addresses from different countries. `predefinedschema.py` sets a template for the model to fill in.
+
+## Review the Code
+
+Both programs are basically the same, with a different prompt for each, demonstrating two different ideas. The key part of getting JSON out of a model is to state in the prompt or system prompt that it should respond using JSON, and specifying the `format` as `json` in the data body.
+
+```python
+prompt = f"generate one realistically believable sample data set of a persons first name, last name, address in {country}, and  phone number. Do not use common names. Respond using JSON. Key names should with no backslashes, values should use plain ascii with no special characters."
+
+data = {
+    "prompt": prompt,
+    "model": model,
+    "format": "json",
+    "stream": False,
+    "options": {"temperature": 2.5, "top_p": 0.99, "top_k": 100},
+}
+```
+
+When running `randomaddresses.py` you will see that the schema changes and adapts to the chosen country.
+
+In `predefinedschema.py`, a template has been specified in the prompt as well. It's been defined as JSON and then dumped into the prompt string to make it easier to work with.
+
+Both examples turn streaming off so that we end up with the completed JSON all at once. We need to convert the `response.text` to JSON so that when we output it as a string we can set the indent spacing to make the output easy to read.
+
+```python
+response = requests.post("http://localhost:11434/api/generate", json=data, stream=False)
+json_data = json.loads(response.text)
+
+print(json.dumps(json.loads(json_data["response"]), indent=2))
+```
--- a/examples/python-json-datagenerator/requirements.txt
+++ b/examples/python-json-datagenerator/requirements.txt
@@ -0,0 +1 @@
+Requests==2.31.0
--- a/examples/python-loganalysis/Modelfile
+++ b/examples/python-loganalysis/Modelfile
@@ -0,0 +1,8 @@
+FROM codebooga:latest
+
+SYSTEM """
+You are a log file analyzer. You will receive a set of lines from a log file for some software application, find the errors and other interesting aspects of the logs, and explain them so a new user can understand what they mean. If there are any steps they can do to resolve them, list the steps in your answer.
+"""
+
+PARAMETER TEMPERATURE 0.3
+
--- a/examples/python-loganalysis/loganalysis.py
+++ b/examples/python-loganalysis/loganalysis.py
@@ -0,0 +1,42 @@
+import sys
+import re
+import requests
+import json
+
+# prelines and postlines represent the number of lines of context to include in the output around the error
+prelines = 10
+postlines = 10
+
+def find_errors_in_log_file():
+  if len(sys.argv) < 2:
+    print("Usage: python loganalysis.py <filename>")
+    return
+
+  log_file_path = sys.argv[1]
+  with open(log_file_path, 'r') as log_file:
+    log_lines = log_file.readlines()
+
+error_logs = []
+    for i, line in enumerate(log_lines):
+        if "error" in line.lower():
+            start_index = max(0, i - prelines)
+            end_index = min(len(log_lines), i + postlines + 1)
+            error_logs.extend(log_lines[start_index:end_index])
+
+  return error_logs
+
+error_logs = find_errors_in_log_file()
+
+data = {
+  "prompt": "\n".join(error_logs), 
+  "model": "mattw/loganalyzer"
+}
+
+
+response = requests.post("http://localhost:11434/api/generate", json=data, stream=True)
+for line in response.iter_lines():
+  if line:
+    json_data = json.loads(line)
+    if json_data['done'] == False:
+      print(json_data['response'], end='', flush=True)
+
--- a/examples/python-loganalysis/logtest.logfile
+++ b/examples/python-loganalysis/logtest.logfile
@@ -0,0 +1,32 @@
+2023-11-10 07:17:40 /docker-entrypoint.sh: /docker-entrypoint.d/ is not empty, will attempt to perform configuration
+2023-11-10 07:17:40 /docker-entrypoint.sh: Looking for shell scripts in /docker-entrypoint.d/
+2023-11-10 07:17:40 /docker-entrypoint.sh: Launching /docker-entrypoint.d/10-listen-on-ipv6-by-default.sh
+2023-11-10 07:17:40 10-listen-on-ipv6-by-default.sh: info: Getting the checksum of /etc/nginx/conf.d/default.conf
+2023-11-10 07:17:40 10-listen-on-ipv6-by-default.sh: info: Enabled listen on IPv6 in /etc/nginx/conf.d/default.conf
+2023-11-10 07:17:40 /docker-entrypoint.sh: Sourcing /docker-entrypoint.d/15-local-resolvers.envsh
+2023-11-10 07:17:40 /docker-entrypoint.sh: Launching /docker-entrypoint.d/20-envsubst-on-templates.sh
+2023-11-10 07:17:40 /docker-entrypoint.sh: Launching /docker-entrypoint.d/30-tune-worker-processes.sh
+2023-11-10 07:17:40 /docker-entrypoint.sh: Configuration complete; ready for start up
+2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: using the "epoll" event method
+2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: nginx/1.25.3
+2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: built by gcc 12.2.0 (Debian 12.2.0-14) 
+2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: OS: Linux 6.4.16-linuxkit
+2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: getrlimit(RLIMIT_NOFILE): 1048576:1048576
+2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: start worker processes
+2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: start worker process 29
+2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: start worker process 30
+2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: start worker process 31
+2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: start worker process 32
+2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: start worker process 33
+2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: start worker process 34
+2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: start worker process 35
+2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: start worker process 36
+2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: start worker process 37
+2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: start worker process 38
+2023-11-10 07:17:44 192.168.65.1 - - [10/Nov/2023:13:17:43 +0000] "GET / HTTP/1.1" 200 615 "-" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36" "-"
+2023-11-10 07:17:44 2023/11/10 13:17:44 [error] 29#29: *1 open() "/usr/share/nginx/html/favicon.ico" failed (2: No such file or directory), client: 192.168.65.1, server: localhost, request: "GET /favicon.ico HTTP/1.1", host: "localhost:8080", referrer: "http://localhost:8080/"
+2023-11-10 07:17:44 192.168.65.1 - - [10/Nov/2023:13:17:44 +0000] "GET /favicon.ico HTTP/1.1" 404 555 "http://localhost:8080/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36" "-"
+2023-11-10 07:17:50 2023/11/10 13:17:50 [error] 29#29: *1 open() "/usr/share/nginx/html/ahstat" failed (2: No such file or directory), client: 192.168.65.1, server: localhost, request: "GET /ahstat HTTP/1.1", host: "localhost:8080"
+2023-11-10 07:17:50 192.168.65.1 - - [10/Nov/2023:13:17:50 +0000] "GET /ahstat HTTP/1.1" 404 555 "-" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36" "-"
+2023-11-10 07:18:53 2023/11/10 13:18:53 [error] 29#29: *1 open() "/usr/share/nginx/html/ahstat" failed (2: No such file or directory), client: 192.168.65.1, server: localhost, request: "GET /ahstat HTTP/1.1", host: "localhost:8080"
+2023-11-10 07:18:53 192.168.65.1 - - [10/Nov/2023:13:18:53 +0000] "GET /ahstat HTTP/1.1" 404 555 "-" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36" "-"
--- a/examples/python-loganalysis/readme.md
+++ b/examples/python-loganalysis/readme.md
@@ -0,0 +1,48 @@
+# Log Analysis example
+
+![loganalyzer 2023-11-10 08_53_29](https://github.com/jmorganca/ollama/assets/633681/ad30f1fc-321f-4953-8914-e30e24db9921)
+
+This example shows one possible way to create a log file analyzer. To use it, run:
+
+`python loganalysis.py <logfile>`
+
+You can try this with the `logtest.logfile` file included in this directory.
+
+## Review the code
+
+The first part of this example is a Modelfile that takes `codebooga` and applies a new System Prompt:
+
+```plaintext
+SYSTEM """
+You are a log file analyzer. You will receive a set of lines from a log file for some software application, find the errors and other interesting aspects of the logs, and explain them so a new user can understand what they mean. If there are any steps they can do to resolve them, list the steps in your answer.
+"""
+```
+
+This model is available at https://ollama.ai/mattw/loganalyzer. You can customize it and add to your own namespace using the command `ollama create <namespace/modelname> -f <path-to-modelfile>` then `ollama push <namespace/modelname>`.
+
+Then loganalysis.py scans all the lines in the given log file and searches for the word 'error'. When the word is found, the 10 lines before and after are set as the prompt for a call to the Generate API.
+
+```python
+data = {
+  "prompt": "\n".join(error_logs), 
+  "model": "mattw/loganalyzer"
+}
+```
+
+Finally, the streamed output is parsed and the response field in the output is printed to the line.
+
+```python
+response = requests.post("http://localhost:11434/api/generate", json=data, stream=True)
+for line in response.iter_lines():
+  if line:
+    json_data = json.loads(line)
+    if json_data['done'] == False:
+      print(json_data['response'], end='')
+
+```
+
+## Next Steps
+
+There is a lot more that can be done here. This is a simple way to detect errors, looking for the word error. Perhaps it would be interesting to find anomalous activity in the logs. It could be interesting to create embeddings for each line and compare them, looking for similar lines. Or look into applying Levenshtein Distance algorithms to find similar lines to help identify the anomalous lines.
+
+Also try different models and different prompts to analyze the data. You could consider adding retrieval augmented generation (RAG) to this to help understand newer log formats.
--- a/examples/python-loganalysis/requirements.txt
+++ b/examples/python-loganalysis/requirements.txt
@@ -0,0 +1 @@
+Requests==2.31.0
--- a/format/bytes.go
+++ b/format/bytes.go
@@ -1,23 +1,45 @@
 package format

-import "fmt"
+import (
+	"fmt"
+	"math"
+)

 const (
 	Byte     = 1
 	KiloByte = Byte * 1000
 	MegaByte = KiloByte * 1000
 	GigaByte = MegaByte * 1000
+	TeraByte = GigaByte * 1000
 )

 func HumanBytes(b int64) string {
+	var value float64
+	var unit string
+
 	switch {
-	case b > GigaByte:
-		return fmt.Sprintf("%.1f GB", float64(b)/GigaByte)
-	case b > MegaByte:
-		return fmt.Sprintf("%.1f MB", float64(b)/MegaByte)
-	case b > KiloByte:
-		return fmt.Sprintf("%.1f KB", float64(b)/KiloByte)
+	case b >= TeraByte:
+		value = float64(b) / TeraByte
+		unit = "TB"
+	case b >= GigaByte:
+		value = float64(b) / GigaByte
+		unit = "GB"
+	case b >= MegaByte:
+		value = float64(b) / MegaByte
+		unit = "MB"
+	case b >= KiloByte:
+		value = float64(b) / KiloByte
+		unit = "KB"
 	default:
 		return fmt.Sprintf("%d B", b)
 	}
+
+	switch {
+	case value >= 100:
+		return fmt.Sprintf("%d %s", int(value), unit)
+	case value != math.Trunc(value):
+		return fmt.Sprintf("%.1f %s", value, unit)
+	default:
+		return fmt.Sprintf("%d %s", int(value), unit)
+	}
 }
--- a/llm/llama.cpp/generate_darwin_amd64.go
+++ b/llm/llama.cpp/generate_darwin_amd64.go
@@ -7,13 +7,13 @@ package llm
 //go:generate git -C ggml apply ../patches/0002-34B-model-support.patch
 //go:generate git -C ggml apply ../patches/0003-metal-fix-synchronization-in-new-matrix-multiplicati.patch
 //go:generate git -C ggml apply ../patches/0004-metal-add-missing-barriers-for-mul-mat-2699.patch
-//go:generate cmake -S ggml -B ggml/build/cpu -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0
+//go:generate cmake -S ggml -B ggml/build/cpu -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_NAME=Darwin -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0
 //go:generate cmake --build ggml/build/cpu --target server --config Release
 //go:generate mv ggml/build/cpu/bin/server ggml/build/cpu/bin/ollama-runner

 //go:generate git submodule update --force gguf
 //go:generate git -C gguf apply ../patches/0001-update-default-log-target.patch
 //go:generate git -C gguf apply ../patches/0001-metal-handle-ggml_scale-for-n-4-0-close-3754.patch
-//go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0
+//go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_METAL=off -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_NAME=Darwin -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_AVX512=off -DLLAMA_FMA=on -DLLAMA_F16C=on
 //go:generate cmake --build gguf/build/cpu --target server --config Release
 //go:generate mv gguf/build/cpu/bin/server gguf/build/cpu/bin/ollama-runner
--- a/llm/llama.go
+++ b/llm/llama.go
@@ -71,9 +71,10 @@ func chooseRunners(workDir, runnerType string) []ModelRunner {
 	// IMPORTANT: the order of the runners in the array is the priority order
 	switch runtime.GOOS {
 	case "darwin":
-		runners = []ModelRunner{
-			{Path: path.Join(buildPath, "metal", "bin", "ollama-runner")},
-			{Path: path.Join(buildPath, "cpu", "bin", "ollama-runner")},
+		if runtime.GOARCH == "arm64" {
+			runners = []ModelRunner{{Path: path.Join(buildPath, "metal", "bin", "ollama-runner")}}
+		} else {
+			runners = []ModelRunner{{Path: path.Join(buildPath, "cpu", "bin", "ollama-runner")}}
 		}
 	case "linux":
 		runners = []ModelRunner{
@@ -225,7 +226,7 @@ type llama struct {
 }

 var (
-	errNvidiaSMI     = errors.New("nvidia-smi command failed")
+	errNvidiaSMI     = errors.New("warning: gpu support may not be enabled, check that you have installed GPU drivers: nvidia-smi command failed")
 	errAvailableVRAM = errors.New("not enough VRAM available, falling back to CPU only")
 )

@@ -342,6 +343,10 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers
 		"--embedding",
 	}

+	if opts.MainGPU > 0 {
+		params = append(params, "--main-gpu", fmt.Sprintf("%d", opts.MainGPU))
+	}
+
 	if opts.RopeFrequencyBase > 0 {
 		params = append(params, "--rope-freq-base", fmt.Sprintf("%f", opts.RopeFrequencyBase))
 	}
@@ -543,6 +548,7 @@ func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string,
 		"stream":            true,
 		"n_predict":         llm.NumPredict,
 		"n_keep":            llm.NumKeep,
+		"main_gpu":          llm.MainGPU,
 		"temperature":       llm.Temperature,
 		"top_k":             llm.TopK,
 		"top_p":             llm.TopP,
--- a/llm/llm.go
+++ b/llm/llm.go
@@ -41,20 +41,13 @@ func New(workDir, model string, adapters []string, opts api.Options) (LLM, error

 	if runtime.GOOS == "darwin" {
 		switch ggml.FileType() {
-		case "Q8_0":
+		case "F32", "Q5_0", "Q5_1", "Q8_0":
 			if ggml.Name() != "gguf" && opts.NumGPU != 0 {
 				// GGML Q8_0 do not support Metal API and will
 				// cause the runner to segmentation fault so disable GPU
 				log.Printf("WARNING: GPU disabled for F32, Q5_0, Q5_1, and Q8_0")
 				opts.NumGPU = 0
 			}
-		case "F32", "Q5_0", "Q5_1":
-			if opts.NumGPU != 0 {
-				// F32, Q5_0, Q5_1, and Q8_0 do not support Metal API and will
-				// cause the runner to segmentation fault so disable GPU
-				log.Printf("WARNING: GPU disabled for F32, Q5_0, Q5_1, and Q8_0")
-				opts.NumGPU = 0
-			}
 		}

 		var requiredMemory int64
--- a/main.go
+++ b/main.go
@@ -2,11 +2,25 @@ package main

 import (
 	"context"
+	"fmt"
+	"os"
+	"os/signal"
+	"syscall"

 	"github.com/jmorganca/ollama/cmd"
 	"github.com/spf13/cobra"
 )

 func main() {
+	sigChan := make(chan os.Signal, 1)
+	signal.Notify(sigChan, syscall.SIGINT)
+
+	go func() {
+		<-sigChan
+		fmt.Print("\033[?25h")
+
+		os.Exit(0)
+	}()
+
 	cobra.CheckErr(cmd.NewCLI().ExecuteContext(context.Background()))
 }
--- a/progress/bar.go
+++ b/progress/bar.go
@@ -0,0 +1,155 @@
+package progress
+
+import (
+	"fmt"
+	"math"
+	"os"
+	"strings"
+	"time"
+
+	"github.com/jmorganca/ollama/format"
+	"golang.org/x/term"
+)
+
+type Stats struct {
+	rate      int64
+	value     int64
+	remaining time.Duration
+}
+
+type Bar struct {
+	message      string
+	messageWidth int
+
+	maxValue     int64
+	initialValue int64
+	currentValue int64
+
+	started time.Time
+
+	stats   Stats
+	statted time.Time
+}
+
+func NewBar(message string, maxValue, initialValue int64) *Bar {
+	return &Bar{
+		message:      message,
+		messageWidth: -1,
+		maxValue:     maxValue,
+		initialValue: initialValue,
+		currentValue: initialValue,
+		started:      time.Now(),
+	}
+}
+
+func (b *Bar) String() string {
+	termWidth, _, err := term.GetSize(int(os.Stderr.Fd()))
+	if err != nil {
+		termWidth = 80
+	}
+
+	var pre, mid, suf strings.Builder
+
+	if b.message != "" {
+		message := strings.TrimSpace(b.message)
+		if b.messageWidth > 0 && len(message) > b.messageWidth {
+			message = message[:b.messageWidth]
+		}
+
+		fmt.Fprintf(&pre, "%s", message)
+		if b.messageWidth-pre.Len() >= 0 {
+			pre.WriteString(strings.Repeat(" ", b.messageWidth-pre.Len()))
+		}
+
+		pre.WriteString(" ")
+	}
+
+	fmt.Fprintf(&pre, "%3.0f%% ", math.Floor(b.percent()))
+	fmt.Fprintf(&suf, "(%s/%s", format.HumanBytes(b.currentValue), format.HumanBytes(b.maxValue))
+
+	stats := b.Stats()
+	rate := int64(stats.rate)
+	if rate > 0 {
+		fmt.Fprintf(&suf, ", %s/s", format.HumanBytes(rate))
+	}
+
+	fmt.Fprintf(&suf, ")")
+
+	elapsed := time.Since(b.started)
+	if b.percent() < 100 && rate > 0 {
+		fmt.Fprintf(&suf, " [%s:%s]", elapsed.Round(time.Second), stats.remaining)
+	} else {
+		fmt.Fprintf(&suf, "        ")
+	}
+
+	mid.WriteString("▕")
+
+	// add 3 extra spaces: 2 boundary characters and 1 space at the end
+	f := termWidth - pre.Len() - suf.Len() - 3
+	n := int(float64(f) * b.percent() / 100)
+
+	if n > 0 {
+		mid.WriteString(strings.Repeat("█", n))
+	}
+
+	if f-n > 0 {
+		mid.WriteString(strings.Repeat(" ", f-n))
+	}
+
+	mid.WriteString("▏")
+
+	return pre.String() + mid.String() + suf.String()
+}
+
+func (b *Bar) Set(value int64) {
+	if value >= b.maxValue {
+		value = b.maxValue
+	}
+
+	b.currentValue = value
+}
+
+func (b *Bar) percent() float64 {
+	if b.maxValue > 0 {
+		return float64(b.currentValue) / float64(b.maxValue) * 100
+	}
+
+	return 0
+}
+
+func (b *Bar) Stats() Stats {
+	if time.Since(b.statted) < time.Second {
+		return b.stats
+	}
+
+	switch {
+	case b.statted.IsZero():
+		b.stats = Stats{
+			value:     b.initialValue,
+			rate:      0,
+			remaining: 0,
+		}
+	case b.currentValue >= b.maxValue:
+		b.stats = Stats{
+			value:     b.maxValue,
+			rate:      0,
+			remaining: 0,
+		}
+	default:
+		rate := b.currentValue - b.stats.value
+		var remaining time.Duration
+		if rate > 0 {
+			remaining = time.Second * time.Duration((float64(b.maxValue-b.currentValue))/(float64(rate)))
+		}
+
+		b.stats = Stats{
+			value:     b.currentValue,
+			rate:      rate,
+			remaining: remaining,
+		}
+	}
+
+	b.statted = time.Now()
+
+	return b.stats
+}
--- a/progress/progress.go
+++ b/progress/progress.go
@@ -0,0 +1,113 @@
+package progress
+
+import (
+	"fmt"
+	"io"
+	"sync"
+	"time"
+)
+
+type State interface {
+	String() string
+}
+
+type Progress struct {
+	mu sync.Mutex
+	w  io.Writer
+
+	pos int
+
+	ticker *time.Ticker
+	states []State
+}
+
+func NewProgress(w io.Writer) *Progress {
+	p := &Progress{w: w}
+	go p.start()
+	return p
+}
+
+func (p *Progress) stop() bool {
+	for _, state := range p.states {
+		if spinner, ok := state.(*Spinner); ok {
+			spinner.Stop()
+		}
+	}
+
+	if p.ticker != nil {
+		p.ticker.Stop()
+		p.ticker = nil
+		p.render()
+		return true
+	}
+
+	return false
+}
+
+func (p *Progress) Stop() bool {
+	stopped := p.stop()
+	if stopped {
+		fmt.Fprint(p.w, "\n")
+	}
+	return stopped
+}
+
+func (p *Progress) StopAndClear() bool {
+	fmt.Fprint(p.w, "\033[?25l")
+	defer fmt.Fprint(p.w, "\033[?25h")
+
+	stopped := p.stop()
+	if stopped {
+		// clear all progress lines
+		for i := 0; i < p.pos; i++ {
+			if i > 0 {
+				fmt.Fprint(p.w, "\033[A")
+			}
+			fmt.Fprint(p.w, "\033[2K\033[1G")
+		}
+	}
+
+	return stopped
+}
+
+func (p *Progress) Add(key string, state State) {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+
+	p.states = append(p.states, state)
+}
+
+func (p *Progress) render() error {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+
+	// clear already rendered progress lines
+	for i := 0; i < p.pos; i++ {
+		if i > 0 {
+			fmt.Fprint(p.w, "\033[A")
+		}
+		fmt.Fprint(p.w, "\033[2K\033[1G")
+	}
+
+	// render progress lines
+	for i, state := range p.states {
+		fmt.Fprint(p.w, state.String())
+		if i < len(p.states)-1 {
+			fmt.Fprint(p.w, "\n")
+		}
+	}
+
+	p.pos = len(p.states)
+
+	return nil
+}
+
+func (p *Progress) start() {
+	p.ticker = time.NewTicker(100 * time.Millisecond)
+	fmt.Fprint(p.w, "\033[?25l")
+	defer fmt.Fprintln(p.w, "\033[?25h")
+
+	for range p.ticker.C {
+		p.render()
+	}
+}
--- a/progress/spinner.go
+++ b/progress/spinner.go
@@ -0,0 +1,72 @@
+package progress
+
+import (
+	"fmt"
+	"strings"
+	"time"
+)
+
+type Spinner struct {
+	message      string
+	messageWidth int
+
+	value int
+
+	ticker  *time.Ticker
+	started time.Time
+	stopped time.Time
+}
+
+func NewSpinner(message string) *Spinner {
+	s := &Spinner{
+		message: message,
+		started: time.Now(),
+		value:   231,
+	}
+	go s.start()
+	return s
+}
+
+func (s *Spinner) String() string {
+	var sb strings.Builder
+	if len(s.message) > 0 {
+		message := strings.TrimSpace(s.message)
+		if s.messageWidth > 0 && len(message) > s.messageWidth {
+			message = message[:s.messageWidth]
+		}
+
+		fmt.Fprintf(&sb, "%s", message)
+		if s.messageWidth-sb.Len() >= 0 {
+			sb.WriteString(strings.Repeat(" ", s.messageWidth-sb.Len()))
+		}
+
+		sb.WriteString(" ")
+	}
+
+	if s.stopped.IsZero() {
+		sb.WriteString(fmt.Sprintf("\033[48;5;%dm ", s.value))
+		sb.WriteString("\033[0m")
+	}
+
+	return sb.String()
+}
+
+func (s *Spinner) start() {
+	s.ticker = time.NewTicker(40 * time.Millisecond)
+	for range s.ticker.C {
+		if s.value < 255 {
+			s.value++
+		} else {
+			s.value = 231
+		}
+		if !s.stopped.IsZero() {
+			return
+		}
+	}
+}
+
+func (s *Spinner) Stop() {
+	if s.stopped.IsZero() {
+		s.stopped = time.Now()
+	}
+}
--- a/progressbar/LICENSE
+++ b/progressbar/LICENSE
@@ -1,21 +0,0 @@
-MIT License
-
-Copyright (c) 2017 Zack
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
--- a/progressbar/README.md
+++ b/progressbar/README.md
@@ -1,121 +0,0 @@
-# progressbar
-
-[![CI](https://github.com/schollz/progressbar/actions/workflows/ci.yml/badge.svg?branch=main&event=push)](https://github.com/schollz/progressbar/actions/workflows/ci.yml)
-[![go report card](https://goreportcard.com/badge/github.com/schollz/progressbar)](https://goreportcard.com/report/github.com/schollz/progressbar) 
-[![coverage](https://img.shields.io/badge/coverage-84%25-brightgreen.svg)](https://gocover.io/github.com/schollz/progressbar)
-[![godocs](https://godoc.org/github.com/schollz/progressbar?status.svg)](https://godoc.org/github.com/schollz/progressbar/v3) 
-
-A very simple thread-safe progress bar which should work on every OS without problems. I needed a progressbar for [croc](https://github.com/schollz/croc) and everything I tried had problems, so I made another one. In order to be OS agnostic I do not plan to support [multi-line outputs](https://github.com/schollz/progressbar/issues/6).
-
-
-## Install
-
-```
-go get -u github.com/schollz/progressbar/v3
-```
-
-## Usage 
-
-### Basic usage
-
-```golang
-bar := progressbar.Default(100)
-for i := 0; i < 100; i++ {
-    bar.Add(1)
-    time.Sleep(40 * time.Millisecond)
-}
-```
-
-which looks like:
-
-![Example of basic bar](examples/basic/basic.gif)
-
-
-### I/O operations
-
-The `progressbar` implements an `io.Writer` so it can automatically detect the number of bytes written to a stream, so you can use it as a progressbar for an `io.Reader`.
-
-```golang
-req, _ := http.NewRequest("GET", "https://dl.google.com/go/go1.14.2.src.tar.gz", nil)
-resp, _ := http.DefaultClient.Do(req)
-defer resp.Body.Close()
-
-f, _ := os.OpenFile("go1.14.2.src.tar.gz", os.O_CREATE|os.O_WRONLY, 0644)
-defer f.Close()
-
-bar := progressbar.DefaultBytes(
-    resp.ContentLength,
-    "downloading",
-)
-io.Copy(io.MultiWriter(f, bar), resp.Body)
-```
-
-which looks like:
-
-![Example of download bar](examples/download/download.gif)
-
-
-### Progress bar with unknown length
-
-A progressbar with unknown length is a spinner. Any bar with -1 length will automatically convert it to a spinner with a customizable spinner type. For example, the above code can be run and set the `resp.ContentLength` to `-1`.
-
-which looks like:
-
-![Example of download bar with unknown length](examples/download-unknown/download-unknown.gif)
-
-
-### Customization
-
-There is a lot of customization that you can do - change the writer, the color, the width, description, theme, etc. See [all the options](https://pkg.go.dev/github.com/schollz/progressbar/v3?tab=doc#Option).
-
-```golang
-bar := progressbar.NewOptions(1000,
-    progressbar.OptionSetWriter(ansi.NewAnsiStdout()),
-    progressbar.OptionEnableColorCodes(true),
-    progressbar.OptionShowBytes(true),
-    progressbar.OptionSetWidth(15),
-    progressbar.OptionSetDescription("[cyan][1/3][reset] Writing moshable file..."),
-    progressbar.OptionSetTheme(progressbar.Theme{
-        Saucer:        "[green]=[reset]",
-        SaucerHead:    "[green]>[reset]",
-        SaucerPadding: " ",
-        BarStart:      "[",
-        BarEnd:        "]",
-    }))
-for i := 0; i < 1000; i++ {
-    bar.Add(1)
-    time.Sleep(5 * time.Millisecond)
-}
-```
-
-which looks like:
-
-![Example of customized bar](examples/customization/customization.gif)
-
-
-## Contributing
-
-Pull requests are welcome. Feel free to...
-
- Revise documentation
- Add new features
- Fix bugs
- Suggest improvements
-
-## Thanks
-
-Thanks [@Dynom](https://github.com/dynom) for massive improvements in version 2.0!
-
-Thanks [@CrushedPixel](https://github.com/CrushedPixel) for adding descriptions and color code support!
-
-Thanks [@MrMe42](https://github.com/MrMe42) for adding some minor features!
-
-Thanks [@tehstun](https://github.com/tehstun) for some great PRs!
-
-Thanks [@Benzammour](https://github.com/Benzammour) and [@haseth](https://github.com/haseth) for helping create v3!
-
-Thanks [@briandowns](https://github.com/briandowns) for compiling the list of spinners.
-
-## License
-
-MIT
--- a/progressbar/progressbar.go
+++ b/progressbar/progressbar.go
--- a/progressbar/spinners.go
+++ b/progressbar/spinners.go
@@ -1,80 +0,0 @@
-package progressbar
-
-var spinners = map[int][]string{
-	0:  {"←", "↖", "↑", "↗", "→", "↘", "↓", "↙"},
-	1:  {"▁", "▃", "▄", "▅", "▆", "▇", "█", "▇", "▆", "▅", "▄", "▃", "▁"},
-	2:  {"▖", "▘", "▝", "▗"},
-	3:  {"┤", "┘", "┴", "└", "├", "┌", "┬", "┐"},
-	4:  {"◢", "◣", "◤", "◥"},
-	5:  {"◰", "◳", "◲", "◱"},
-	6:  {"◴", "◷", "◶", "◵"},
-	7:  {"◐", "◓", "◑", "◒"},
-	8:  {".", "o", "O", "@", "*"},
-	9:  {"|", "/", "-", "\\"},
-	10: {"◡◡", "⊙⊙", "◠◠"},
-	11: {"⣾", "⣽", "⣻", "⢿", "⡿", "⣟", "⣯", "⣷"},
-	12: {">))'>", " >))'>", "  >))'>", "   >))'>", "    >))'>", "   <'((<", "  <'((<", " <'((<"},
-	13: {"⠁", "⠂", "⠄", "⡀", "⢀", "⠠", "⠐", "⠈"},
-	14: {"⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"},
-	15: {"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"},
-	16: {"▉", "▊", "▋", "▌", "▍", "▎", "▏", "▎", "▍", "▌", "▋", "▊", "▉"},
-	17: {"■", "□", "▪", "▫"},
-	18: {"←", "↑", "→", "↓"},
-	19: {"╫", "╪"},
-	20: {"⇐", "⇖", "⇑", "⇗", "⇒", "⇘", "⇓", "⇙"},
-	21: {"⠁", "⠁", "⠉", "⠙", "⠚", "⠒", "⠂", "⠂", "⠒", "⠲", "⠴", "⠤", "⠄", "⠄", "⠤", "⠠", "⠠", "⠤", "⠦", "⠖", "⠒", "⠐", "⠐", "⠒", "⠓", "⠋", "⠉", "⠈", "⠈"},
-	22: {"⠈", "⠉", "⠋", "⠓", "⠒", "⠐", "⠐", "⠒", "⠖", "⠦", "⠤", "⠠", "⠠", "⠤", "⠦", "⠖", "⠒", "⠐", "⠐", "⠒", "⠓", "⠋", "⠉", "⠈"},
-	23: {"⠁", "⠉", "⠙", "⠚", "⠒", "⠂", "⠂", "⠒", "⠲", "⠴", "⠤", "⠄", "⠄", "⠤", "⠴", "⠲", "⠒", "⠂", "⠂", "⠒", "⠚", "⠙", "⠉", "⠁"},
-	24: {"⠋", "⠙", "⠚", "⠒", "⠂", "⠂", "⠒", "⠲", "⠴", "⠦", "⠖", "⠒", "⠐", "⠐", "⠒", "⠓", "⠋"},
-	25: {"ｦ", "ｧ", "ｨ", "ｩ", "ｪ", "ｫ", "ｬ", "ｭ", "ｮ", "ｯ", "ｱ", "ｲ", "ｳ", "ｴ", "ｵ", "ｶ", "ｷ", "ｸ", "ｹ", "ｺ", "ｻ", "ｼ", "ｽ", "ｾ", "ｿ", "ﾀ", "ﾁ", "ﾂ", "ﾃ", "ﾄ", "ﾅ", "ﾆ", "ﾇ", "ﾈ", "ﾉ", "ﾊ", "ﾋ", "ﾌ", "ﾍ", "ﾎ", "ﾏ", "ﾐ", "ﾑ", "ﾒ", "ﾓ", "ﾔ", "ﾕ", "ﾖ", "ﾗ", "ﾘ", "ﾙ", "ﾚ", "ﾛ", "ﾜ", "ﾝ"},
-	26: {".", "..", "..."},
-	27: {"▁", "▂", "▃", "▄", "▅", "▆", "▇", "█", "▉", "▊", "▋", "▌", "▍", "▎", "▏", "▏", "▎", "▍", "▌", "▋", "▊", "▉", "█", "▇", "▆", "▅", "▄", "▃", "▂", "▁"},
-	28: {".", "o", "O", "°", "O", "o", "."},
-	29: {"+", "x"},
-	30: {"v", "<", "^", ">"},
-	31: {">>--->", " >>--->", "  >>--->", "   >>--->", "    >>--->", "    <---<<", "   <---<<", "  <---<<", " <---<<", "<---<<"},
-	32: {"|", "||", "|||", "||||", "|||||", "|||||||", "||||||||", "|||||||", "||||||", "|||||", "||||", "|||", "||", "|"},
-	33: {"[          ]", "[=         ]", "[==        ]", "[===       ]", "[====      ]", "[=====     ]", "[======    ]", "[=======   ]", "[========  ]", "[========= ]", "[==========]"},
-	34: {"(*---------)", "(-*--------)", "(--*-------)", "(---*------)", "(----*-----)", "(-----*----)", "(------*---)", "(-------*--)", "(--------*-)", "(---------*)"},
-	35: {"█▒▒▒▒▒▒▒▒▒", "███▒▒▒▒▒▒▒", "█████▒▒▒▒▒", "███████▒▒▒", "██████████"},
-	36: {"[                    ]", "[=>                  ]", "[===>                ]", "[=====>              ]", "[======>             ]", "[========>           ]", "[==========>         ]", "[============>       ]", "[==============>     ]", "[================>   ]", "[==================> ]", "[===================>]"},
-	37: {"ဝ", "၀"},
-	38: {"▌", "▀", "▐▄"},
-	39: {"🌍", "🌎", "🌏"},
-	40: {"◜", "◝", "◞", "◟"},
-	41: {"⬒", "⬔", "⬓", "⬕"},
-	42: {"⬖", "⬘", "⬗", "⬙"},
-	43: {"[>>>          >]", "[]>>>>        []", "[]  >>>>      []", "[]    >>>>    []", "[]      >>>>  []", "[]        >>>>[]", "[>>          >>]"},
-	44: {"♠", "♣", "♥", "♦"},
-	45: {"➞", "➟", "➠", "➡", "➠", "➟"},
-	46: {"  |  ", ` \   `, "_    ", ` \   `, "  |  ", "   / ", "    _", "   / "},
-	47: {"  . . . .", ".   . . .", ". .   . .", ". . .   .", ". . . .  ", ". . . . ."},
-	48: {" |     ", "  /    ", "   _   ", `    \  `, "     | ", `    \  `, "   _   ", "  /    "},
-	49: {"⎺", "⎻", "⎼", "⎽", "⎼", "⎻"},
-	50: {"▹▹▹▹▹", "▸▹▹▹▹", "▹▸▹▹▹", "▹▹▸▹▹", "▹▹▹▸▹", "▹▹▹▹▸"},
-	51: {"[    ]", "[   =]", "[  ==]", "[ ===]", "[====]", "[=== ]", "[==  ]", "[=   ]"},
-	52: {"( ●    )", "(  ●   )", "(   ●  )", "(    ● )", "(     ●)", "(    ● )", "(   ●  )", "(  ●   )", "( ●    )"},
-	53: {"✶", "✸", "✹", "✺", "✹", "✷"},
-	54: {"▐|\\____________▌", "▐_|\\___________▌", "▐__|\\__________▌", "▐___|\\_________▌", "▐____|\\________▌", "▐_____|\\_______▌", "▐______|\\______▌", "▐_______|\\_____▌", "▐________|\\____▌", "▐_________|\\___▌", "▐__________|\\__▌", "▐___________|\\_▌", "▐____________|\\▌", "▐____________/|▌", "▐___________/|_▌", "▐__________/|__▌", "▐_________/|___▌", "▐________/|____▌", "▐_______/|_____▌", "▐______/|______▌", "▐_____/|_______▌", "▐____/|________▌", "▐___/|_________▌", "▐__/|__________▌", "▐_/|___________▌", "▐/|____________▌"},
-	55: {"▐⠂       ▌", "▐⠈       ▌", "▐ ⠂      ▌", "▐ ⠠      ▌", "▐  ⡀     ▌", "▐  ⠠     ▌", "▐   ⠂    ▌", "▐   ⠈    ▌", "▐    ⠂   ▌", "▐    ⠠   ▌", "▐     ⡀  ▌", "▐     ⠠  ▌", "▐      ⠂ ▌", "▐      ⠈ ▌", "▐       ⠂▌", "▐       ⠠▌", "▐       ⡀▌", "▐      ⠠ ▌", "▐      ⠂ ▌", "▐     ⠈  ▌", "▐     ⠂  ▌", "▐    ⠠   ▌", "▐    ⡀   ▌", "▐   ⠠    ▌", "▐   ⠂    ▌", "▐  ⠈     ▌", "▐  ⠂     ▌", "▐ ⠠      ▌", "▐ ⡀      ▌", "▐⠠       ▌"},
-	56: {"¿", "?"},
-	57: {"⢹", "⢺", "⢼", "⣸", "⣇", "⡧", "⡗", "⡏"},
-	58: {"⢄", "⢂", "⢁", "⡁", "⡈", "⡐", "⡠"},
-	59: {".  ", ".. ", "...", " ..", "  .", "   "},
-	60: {".", "o", "O", "°", "O", "o", "."},
-	61: {"▓", "▒", "░"},
-	62: {"▌", "▀", "▐", "▄"},
-	63: {"⊶", "⊷"},
-	64: {"▪", "▫"},
-	65: {"□", "■"},
-	66: {"▮", "▯"},
-	67: {"-", "=", "≡"},
-	68: {"d", "q", "p", "b"},
-	69: {"∙∙∙", "●∙∙", "∙●∙", "∙∙●", "∙∙∙"},
-	70: {"🌑 ", "🌒 ", "🌓 ", "🌔 ", "🌕 ", "🌖 ", "🌗 ", "🌘 "},
-	71: {"☗", "☖"},
-	72: {"⧇", "⧆"},
-	73: {"◉", "◎"},
-	74: {"㊂", "㊀", "㊁"},
-	75: {"⦾", "⦿"},
-}
--- a/scripts/build_darwin.sh
+++ b/scripts/build_darwin.sh
@@ -10,6 +10,7 @@ mkdir -p dist
 for TARGETARCH in arm64 amd64; do
    GOOS=darwin GOARCH=$TARGETARCH go generate ./...
    GOOS=darwin GOARCH=$TARGETARCH go build -o dist/ollama-darwin-$TARGETARCH
+    rm -rf llm/llama.cpp/*/build
 done

 lipo -create -output dist/ollama dist/ollama-darwin-*
--- a/scripts/build_docker.sh
+++ b/scripts/build_docker.sh
@@ -10,6 +10,8 @@ docker buildx build \
    --platform=linux/arm64,linux/amd64 \
    --build-arg=VERSION \
    --build-arg=GOFLAGS \
+    --cache-from type=local,src=.cache \
+    --cache-to type=local,dest=.cache \
    -f Dockerfile \
    -t ollama \
    .
--- a/scripts/install.sh
+++ b/scripts/install.sh
@@ -181,6 +181,9 @@ install_cuda_driver_apt() {
        debian)
            status 'Enabling contrib sources...'
            $SUDO sed 's/main/contrib/' < /etc/apt/sources.list | $SUDO tee /etc/apt/sources.list.d/contrib.list > /dev/null
+            if [ -f "/etc/apt/sources.list.d/debian.sources" ]; then
+                $SUDO sed 's/main/contrib/' < /etc/apt/sources.list.d/debian.sources | $SUDO tee /etc/apt/sources.list.d/contrib.sources > /dev/null
+            fi
            ;;
    esac

--- a/scripts/push_docker.sh
+++ b/scripts/push_docker.sh
@@ -10,6 +10,7 @@ docker buildx build \
    --platform=linux/arm64,linux/amd64 \
    --build-arg=VERSION \
    --build-arg=GOFLAGS \
+    --cache-from type=local,src=.cache \
    -f Dockerfile \
    -t ollama/ollama -t ollama/ollama:$VERSION \
    .
--- a/server/download.go
+++ b/server/download.go
@@ -7,6 +7,7 @@ import (
 	"fmt"
 	"io"
 	"log"
+	"math"
 	"net/http"
 	"net/url"
 	"os"
@@ -53,8 +54,8 @@ type blobDownloadPart struct {

 const (
 	numDownloadParts          = 64
-	minDownloadPartSize int64 = 32 * 1000 * 1000
-	maxDownloadPartSize int64 = 256 * 1000 * 1000
+	minDownloadPartSize int64 = 100 * format.MegaByte
+	maxDownloadPartSize int64 = 1000 * format.MegaByte
 )

 func (p *blobDownloadPart) Name() string {
@@ -147,7 +148,6 @@ func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *Regis
 			continue
 		}

-		i := i
 		g.Go(func() error {
 			var err error
 			for try := 0; try < maxRetries; try++ {
@@ -158,12 +158,11 @@ func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *Regis
 					// return immediately if the context is canceled or the device is out of space
 					return err
 				case err != nil:
-					log.Printf("%s part %d attempt %d failed: %v, retrying", b.Digest[7:19], i, try, err)
+					sleep := time.Second * time.Duration(math.Pow(2, float64(try)))
+					log.Printf("%s part %d attempt %d failed: %v, retrying in %s", b.Digest[7:19], part.N, try, err, sleep)
+					time.Sleep(sleep)
 					continue
 				default:
-					if try > 0 {
-						log.Printf("%s part %d completed after %d retries", b.Digest[7:19], i, try)
-					}
 					return nil
 				}
 			}
@@ -285,7 +284,7 @@ func (b *blobDownload) Wait(ctx context.Context, fn func(api.ProgressResponse))
 		}

 		fn(api.ProgressResponse{
-			Status:    fmt.Sprintf("downloading %s", b.Digest),
+			Status:    fmt.Sprintf("pulling %s", b.Digest[7:19]),
 			Digest:    b.Digest,
 			Total:     b.Total,
 			Completed: b.Completed.Load(),
@@ -304,7 +303,7 @@ type downloadOpts struct {
 	fn      func(api.ProgressResponse)
 }

-const maxRetries = 3
+const maxRetries = 6

 var errMaxRetriesExceeded = errors.New("max retries exceeded")

@@ -322,7 +321,7 @@ func downloadBlob(ctx context.Context, opts downloadOpts) error {
 		return err
 	default:
 		opts.fn(api.ProgressResponse{
-			Status:    fmt.Sprintf("downloading %s", opts.digest),
+			Status:    fmt.Sprintf("pulling %s", opts.digest[7:19]),
 			Digest:    opts.digest,
 			Total:     fi.Size(),
 			Completed: fi.Size(),
--- a/server/images.go
+++ b/server/images.go
@@ -60,10 +60,12 @@ func (m *Model) Prompt(request api.GenerateRequest) (string, error) {
 	}

 	var vars struct {
+		First  bool
 		System string
 		Prompt string
 	}

+	vars.First = len(request.Context) == 0
 	vars.System = m.System
 	vars.Prompt = request.Prompt

@@ -226,220 +228,181 @@ func GetModel(name string) (*Model, error) {
 	return model, nil
 }

-func filenameWithPath(path, f string) (string, error) {
-	// if filePath starts with ~/, replace it with the user's home directory.
-	if strings.HasPrefix(f, fmt.Sprintf("~%s", string(os.PathSeparator))) {
-		parts := strings.Split(f, string(os.PathSeparator))
-		home, err := os.UserHomeDir()
-		if err != nil {
-			return "", fmt.Errorf("failed to open file: %v", err)
-		}
-
-		f = filepath.Join(home, filepath.Join(parts[1:]...))
+func realpath(p string) string {
+	abspath, err := filepath.Abs(p)
+	if err != nil {
+		return p
 	}

-	// if filePath is not an absolute path, make it relative to the modelfile path
-	if !filepath.IsAbs(f) {
-		f = filepath.Join(filepath.Dir(path), f)
+	home, err := os.UserHomeDir()
+	if err != nil {
+		return abspath
 	}

-	return f, nil
+	if p == "~" {
+		return home
+	} else if strings.HasPrefix(p, "~/") {
+		return filepath.Join(home, p[2:])
+	}
+
+	return abspath
 }

-func CreateModel(ctx context.Context, name string, path string, fn func(resp api.ProgressResponse)) error {
-	mp := ParseModelPath(name)
-
-	var manifest *ManifestV2
-	var err error
-	var noprune string
-
-	// build deleteMap to prune unused layers
-	deleteMap := make(map[string]bool)
-
-	if noprune = os.Getenv("OLLAMA_NOPRUNE"); noprune == "" {
-		manifest, _, err = GetManifest(mp)
-		if err != nil && !errors.Is(err, os.ErrNotExist) {
-			return err
-		}
-
-		if manifest != nil {
-			for _, l := range manifest.Layers {
-				deleteMap[l.Digest] = true
-			}
-			deleteMap[manifest.Config.Digest] = true
-		}
-	}
-
-	mf, err := os.Open(path)
-	if err != nil {
-		fn(api.ProgressResponse{Status: fmt.Sprintf("couldn't open modelfile '%s'", path)})
-		return fmt.Errorf("failed to open file: %w", err)
-	}
-	defer mf.Close()
-
-	fn(api.ProgressResponse{Status: "parsing modelfile"})
-	commands, err := parser.Parse(mf)
-	if err != nil {
-		return err
-	}
-
+func CreateModel(ctx context.Context, name string, commands []parser.Command, fn func(resp api.ProgressResponse)) error {
 	config := ConfigV2{
-		Architecture: "amd64",
 		OS:           "linux",
+		Architecture: "amd64",
 	}

+	deleteMap := make(map[string]struct{})
+
 	var layers []*LayerReader
+
 	params := make(map[string][]string)
-	var sourceParams map[string]any
+	fromParams := make(map[string]any)
+
 	for _, c := range commands {
-		log.Printf("[%s] - %s\n", c.Name, c.Args)
+		log.Printf("[%s] - %s", c.Name, c.Args)
+		mediatype := fmt.Sprintf("application/vnd.ollama.image.%s", c.Name)
+
 		switch c.Name {
 		case "model":
-			fn(api.ProgressResponse{Status: "looking for model"})
+			if strings.HasPrefix(c.Args, "@") {
+				blobPath, err := GetBlobsPath(strings.TrimPrefix(c.Args, "@"))
+				if err != nil {
+					return err
+				}

-			mp := ParseModelPath(c.Args)
-			mf, _, err := GetManifest(mp)
+				c.Args = blobPath
+			}
+
+			bin, err := os.Open(realpath(c.Args))
 			if err != nil {
-				modelFile, err := filenameWithPath(path, c.Args)
-				if err != nil {
+				// not a file on disk so must be a model reference
+				modelpath := ParseModelPath(c.Args)
+				manifest, _, err := GetManifest(modelpath)
+				switch {
+				case errors.Is(err, os.ErrNotExist):
+					fn(api.ProgressResponse{Status: "pulling model"})
+					if err := PullModel(ctx, c.Args, &RegistryOptions{}, fn); err != nil {
+						return err
+					}
+
+					manifest, _, err = GetManifest(modelpath)
+					if err != nil {
+						return err
+					}
+				case err != nil:
 					return err
 				}
-				if _, err := os.Stat(modelFile); err != nil {
-					// the model file does not exist, try pulling it
-					if errors.Is(err, os.ErrNotExist) {
-						fn(api.ProgressResponse{Status: "pulling model file"})
-						if err := PullModel(ctx, c.Args, &RegistryOptions{}, fn); err != nil {
-							return err
-						}
-						mf, _, err = GetManifest(mp)
-						if err != nil {
-							return fmt.Errorf("failed to open file after pull: %v", err)
-						}
-					} else {
-						return err
-					}
-				} else {
-					// create a model from this specified file
-					fn(api.ProgressResponse{Status: "creating model layer"})
-					file, err := os.Open(modelFile)
-					if err != nil {
-						return fmt.Errorf("failed to open file: %v", err)
-					}
-					defer file.Close()

-					ggml, err := llm.DecodeGGML(file)
-					if err != nil {
-						return err
-					}
-
-					config.ModelFormat = ggml.Name()
-					config.ModelFamily = ggml.ModelFamily()
-					config.ModelType = ggml.ModelType()
-					config.FileType = ggml.FileType()
-
-					// reset the file
-					file.Seek(0, io.SeekStart)
-
-					l, err := CreateLayer(file)
-					if err != nil {
-						return fmt.Errorf("failed to create layer: %v", err)
-					}
-					l.MediaType = "application/vnd.ollama.image.model"
-					layers = append(layers, l)
-				}
-			}
-
-			if mf != nil {
 				fn(api.ProgressResponse{Status: "reading model metadata"})
-				sourceBlobPath, err := GetBlobsPath(mf.Config.Digest)
+				fromConfigPath, err := GetBlobsPath(manifest.Config.Digest)
 				if err != nil {
 					return err
 				}

-				sourceBlob, err := os.Open(sourceBlobPath)
+				fromConfigFile, err := os.Open(fromConfigPath)
 				if err != nil {
 					return err
 				}
-				defer sourceBlob.Close()
+				defer fromConfigFile.Close()

-				var source ConfigV2
-				if err := json.NewDecoder(sourceBlob).Decode(&source); err != nil {
+				var fromConfig ConfigV2
+				if err := json.NewDecoder(fromConfigFile).Decode(&fromConfig); err != nil {
 					return err
 				}

-				// copy the model metadata
-				config.ModelFamily = source.ModelFamily
-				config.ModelType = source.ModelType
-				config.ModelFormat = source.ModelFormat
-				config.FileType = source.FileType
+				config.ModelFormat = fromConfig.ModelFormat
+				config.ModelFamily = fromConfig.ModelFamily
+				config.ModelType = fromConfig.ModelType
+				config.FileType = fromConfig.FileType

-				for _, l := range mf.Layers {
-					if l.MediaType == "application/vnd.ollama.image.params" {
-						sourceParamsBlobPath, err := GetBlobsPath(l.Digest)
+				for _, layer := range manifest.Layers {
+					deleteMap[layer.Digest] = struct{}{}
+					if layer.MediaType == "application/vnd.ollama.image.params" {
+						fromParamsPath, err := GetBlobsPath(layer.Digest)
 						if err != nil {
 							return err
 						}

-						sourceParamsBlob, err := os.Open(sourceParamsBlobPath)
+						fromParamsFile, err := os.Open(fromParamsPath)
 						if err != nil {
 							return err
 						}
-						defer sourceParamsBlob.Close()
+						defer fromParamsFile.Close()

-						if err := json.NewDecoder(sourceParamsBlob).Decode(&sourceParams); err != nil {
+						if err := json.NewDecoder(fromParamsFile).Decode(&fromParams); err != nil {
 							return err
 						}
 					}

-					newLayer, err := GetLayerWithBufferFromLayer(l)
+					layer, err := GetLayerWithBufferFromLayer(layer)
 					if err != nil {
 						return err
 					}
-					newLayer.From = mp.GetShortTagname()
-					layers = append(layers, newLayer)
-				}
-			}
-		case "adapter":
-			fn(api.ProgressResponse{Status: fmt.Sprintf("creating model %s layer", c.Name)})

-			fp, err := filenameWithPath(path, c.Args)
+					layer.From = modelpath.GetShortTagname()
+					layers = append(layers, layer)
+				}
+
+				deleteMap[manifest.Config.Digest] = struct{}{}
+				continue
+			}
+			defer bin.Close()
+
+			fn(api.ProgressResponse{Status: "creating model layer"})
+			ggml, err := llm.DecodeGGML(bin)
 			if err != nil {
 				return err
 			}

-			// create a model from this specified file
-			fn(api.ProgressResponse{Status: "creating model layer"})
+			config.ModelFormat = ggml.Name()
+			config.ModelFamily = ggml.ModelFamily()
+			config.ModelType = ggml.ModelType()
+			config.FileType = ggml.FileType()

-			file, err := os.Open(fp)
+			bin.Seek(0, io.SeekStart)
+			layer, err := CreateLayer(bin)
 			if err != nil {
-				return fmt.Errorf("failed to open file: %v", err)
+				return err
 			}
-			defer file.Close()

-			l, err := CreateLayer(file)
+			layer.MediaType = mediatype
+			layers = append(layers, layer)
+		case "adapter":
+			fn(api.ProgressResponse{Status: "creating adapter layer"})
+			bin, err := os.Open(realpath(c.Args))
 			if err != nil {
-				return fmt.Errorf("failed to create layer: %v", err)
+				return err
 			}
-			l.MediaType = "application/vnd.ollama.image.adapter"
-			layers = append(layers, l)
-		case "license":
-			fn(api.ProgressResponse{Status: fmt.Sprintf("creating model %s layer", c.Name)})
-			mediaType := fmt.Sprintf("application/vnd.ollama.image.%s", c.Name)
+			defer bin.Close()

-			layer, err := CreateLayer(strings.NewReader(c.Args))
+			layer, err := CreateLayer(bin)
 			if err != nil {
 				return err
 			}

 			if layer.Size > 0 {
-				layer.MediaType = mediaType
+				layer.MediaType = mediatype
 				layers = append(layers, layer)
 			}
-		case "template", "system", "prompt":
-			fn(api.ProgressResponse{Status: fmt.Sprintf("creating model %s layer", c.Name)})
-			// remove the layer if one exists
-			mediaType := fmt.Sprintf("application/vnd.ollama.image.%s", c.Name)
-			layers = removeLayerFromLayers(layers, mediaType)
+		case "license":
+			fn(api.ProgressResponse{Status: "creating license layer"})
+			layer, err := CreateLayer(strings.NewReader(c.Args))
+			if err != nil {
+				return err
+			}
+
+			if layer.Size > 0 {
+				layer.MediaType = mediatype
+				layers = append(layers, layer)
+			}
+		case "template", "system":
+			fn(api.ProgressResponse{Status: fmt.Sprintf("creating %s layer", c.Name)})
+
+			// remove duplicate layers
+			layers = removeLayerFromLayers(layers, mediatype)

 			layer, err := CreateLayer(strings.NewReader(c.Args))
 			if err != nil {
@@ -447,48 +410,47 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api
 			}

 			if layer.Size > 0 {
-				layer.MediaType = mediaType
+				layer.MediaType = mediatype
 				layers = append(layers, layer)
 			}
 		default:
-			// runtime parameters, build a list of args for each parameter to allow multiple values to be specified (ex: multiple stop sequences)
 			params[c.Name] = append(params[c.Name], c.Args)
 		}
 	}

-	// Create a single layer for the parameters
 	if len(params) > 0 {
-		fn(api.ProgressResponse{Status: "creating parameter layer"})
+		fn(api.ProgressResponse{Status: "creating parameters layer"})

-		layers = removeLayerFromLayers(layers, "application/vnd.ollama.image.params")
 		formattedParams, err := formatParams(params)
 		if err != nil {
-			return fmt.Errorf("couldn't create params json: %v", err)
+			return err
 		}

-		for k, v := range sourceParams {
+		for k, v := range fromParams {
 			if _, ok := formattedParams[k]; !ok {
 				formattedParams[k] = v
 			}
 		}

 		if config.ModelType == "65B" {
-			if numGQA, ok := formattedParams["num_gqa"].(int); ok && numGQA == 8 {
+			if gqa, ok := formattedParams["gqa"].(int); ok && gqa == 8 {
 				config.ModelType = "70B"
 			}
 		}

-		bts, err := json.Marshal(formattedParams)
+		var b bytes.Buffer
+		if err := json.NewEncoder(&b).Encode(formattedParams); err != nil {
+			return err
+		}
+
+		fn(api.ProgressResponse{Status: "creating config layer"})
+		layer, err := CreateLayer(bytes.NewReader(b.Bytes()))
 		if err != nil {
 			return err
 		}

-		l, err := CreateLayer(bytes.NewReader(bts))
-		if err != nil {
-			return fmt.Errorf("failed to create layer: %v", err)
-		}
-		l.MediaType = "application/vnd.ollama.image.params"
-		layers = append(layers, l)
+		layer.MediaType = "application/vnd.ollama.image.params"
+		layers = append(layers, layer)
 	}

 	digests, err := getLayerDigests(layers)
@@ -496,36 +458,31 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api
 		return err
 	}

-	var manifestLayers []*Layer
-	for _, l := range layers {
-		manifestLayers = append(manifestLayers, &l.Layer)
-		delete(deleteMap, l.Layer.Digest)
-	}
-
-	// Create a layer for the config object
-	fn(api.ProgressResponse{Status: "creating config layer"})
-	cfg, err := createConfigLayer(config, digests)
+	configLayer, err := createConfigLayer(config, digests)
 	if err != nil {
 		return err
 	}
-	layers = append(layers, cfg)
-	delete(deleteMap, cfg.Layer.Digest)
+
+	layers = append(layers, configLayer)
+	delete(deleteMap, configLayer.Digest)

 	if err := SaveLayers(layers, fn, false); err != nil {
 		return err
 	}

-	// Create the manifest
+	var contentLayers []*Layer
+	for _, layer := range layers {
+		contentLayers = append(contentLayers, &layer.Layer)
+		delete(deleteMap, layer.Digest)
+	}
+
 	fn(api.ProgressResponse{Status: "writing manifest"})
-	err = CreateManifest(name, cfg, manifestLayers)
-	if err != nil {
+	if err := CreateManifest(name, configLayer, contentLayers); err != nil {
 		return err
 	}

-	if noprune == "" {
-		fn(api.ProgressResponse{Status: "removing any unused layers"})
-		err = deleteUnusedLayers(nil, deleteMap, false)
-		if err != nil {
+	if noprune := os.Getenv("OLLAMA_NOPRUNE"); noprune == "" {
+		if err := deleteUnusedLayers(nil, deleteMap, false); err != nil {
 			return err
 		}
 	}
@@ -737,7 +694,7 @@ func CopyModel(src, dest string) error {
 	return nil
 }

-func deleteUnusedLayers(skipModelPath *ModelPath, deleteMap map[string]bool, dryRun bool) error {
+func deleteUnusedLayers(skipModelPath *ModelPath, deleteMap map[string]struct{}, dryRun bool) error {
 	fp, err := GetManifestPath()
 	if err != nil {
 		return err
@@ -777,21 +734,19 @@ func deleteUnusedLayers(skipModelPath *ModelPath, deleteMap map[string]bool, dry
 	}

 	// only delete the files which are still in the deleteMap
-	for k, v := range deleteMap {
-		if v {
-			fp, err := GetBlobsPath(k)
-			if err != nil {
-				log.Printf("couldn't get file path for '%s': %v", k, err)
+	for k := range deleteMap {
+		fp, err := GetBlobsPath(k)
+		if err != nil {
+			log.Printf("couldn't get file path for '%s': %v", k, err)
+			continue
+		}
+		if !dryRun {
+			if err := os.Remove(fp); err != nil {
+				log.Printf("couldn't remove file '%s': %v", fp, err)
 				continue
 			}
-			if !dryRun {
-				if err := os.Remove(fp); err != nil {
-					log.Printf("couldn't remove file '%s': %v", fp, err)
-					continue
-				}
-			} else {
-				log.Printf("wanted to remove: %s", fp)
-			}
+		} else {
+			log.Printf("wanted to remove: %s", fp)
 		}
 	}

@@ -799,7 +754,7 @@ func deleteUnusedLayers(skipModelPath *ModelPath, deleteMap map[string]bool, dry
 }

 func PruneLayers() error {
-	deleteMap := make(map[string]bool)
+	deleteMap := make(map[string]struct{})
 	p, err := GetBlobsPath("")
 	if err != nil {
 		return err
@@ -816,7 +771,9 @@ func PruneLayers() error {
 		if runtime.GOOS == "windows" {
 			name = strings.ReplaceAll(name, "-", ":")
 		}
-		deleteMap[name] = true
+		if strings.HasPrefix(name, "sha256:") {
+			deleteMap[name] = struct{}{}
+		}
 	}

 	log.Printf("total blobs: %d", len(deleteMap))
@@ -871,11 +828,11 @@ func DeleteModel(name string) error {
 		return err
 	}

-	deleteMap := make(map[string]bool)
+	deleteMap := make(map[string]struct{})
 	for _, layer := range manifest.Layers {
-		deleteMap[layer.Digest] = true
+		deleteMap[layer.Digest] = struct{}{}
 	}
-	deleteMap[manifest.Config.Digest] = true
+	deleteMap[manifest.Config.Digest] = struct{}{}

 	err = deleteUnusedLayers(&mp, deleteMap, false)
 	if err != nil {
@@ -977,6 +934,9 @@ func PushModel(ctx context.Context, name string, regOpts *RegistryOptions, fn fu
 	for _, layer := range layers {
 		if err := uploadBlob(ctx, mp, layer, regOpts, fn); err != nil {
 			log.Printf("error uploading blob: %v", err)
+			if errors.Is(err, errUnauthorized) {
+				return fmt.Errorf("unable to push %s, make sure this namespace exists and you are authorized to push to it", ParseModelPath(name).GetNamespaceRepository())
+			}
 			return err
 		}
 	}
@@ -1011,7 +971,7 @@ func PullModel(ctx context.Context, name string, regOpts *RegistryOptions, fn fu
 	var noprune string

 	// build deleteMap to prune unused layers
-	deleteMap := make(map[string]bool)
+	deleteMap := make(map[string]struct{})

 	if noprune = os.Getenv("OLLAMA_NOPRUNE"); noprune == "" {
 		manifest, _, err = GetManifest(mp)
@@ -1021,9 +981,9 @@ func PullModel(ctx context.Context, name string, regOpts *RegistryOptions, fn fu

 		if manifest != nil {
 			for _, l := range manifest.Layers {
-				deleteMap[l.Digest] = true
+				deleteMap[l.Digest] = struct{}{}
 			}
-			deleteMap[manifest.Config.Digest] = true
+			deleteMap[manifest.Config.Digest] = struct{}{}
 		}
 	}

@@ -1163,44 +1123,52 @@ func GetSHA256Digest(r io.Reader) (string, int64) {
 	return fmt.Sprintf("sha256:%x", h.Sum(nil)), n
 }

+var errUnauthorized = fmt.Errorf("unauthorized")
+
 func makeRequestWithRetry(ctx context.Context, method string, requestURL *url.URL, headers http.Header, body io.ReadSeeker, regOpts *RegistryOptions) (*http.Response, error) {
-	for try := 0; try < maxRetries; try++ {
-		resp, err := makeRequest(ctx, method, requestURL, headers, body, regOpts)
-		if err != nil {
-			log.Printf("couldn't start upload: %v", err)
-			return nil, err
+	resp, err := makeRequest(ctx, method, requestURL, headers, body, regOpts)
+	if err != nil {
+		if !errors.Is(err, context.Canceled) {
+			log.Printf("request failed: %v", err)
 		}

-		switch {
-		case resp.StatusCode == http.StatusUnauthorized:
-			auth := resp.Header.Get("www-authenticate")
-			authRedir := ParseAuthRedirectString(auth)
-			token, err := getAuthToken(ctx, authRedir)
+		return nil, err
+	}
+
+	switch {
+	case resp.StatusCode == http.StatusUnauthorized:
+		// Handle authentication error with one retry
+		auth := resp.Header.Get("www-authenticate")
+		authRedir := ParseAuthRedirectString(auth)
+		token, err := getAuthToken(ctx, authRedir)
+		if err != nil {
+			return nil, err
+		}
+		regOpts.Token = token
+		if body != nil {
+			_, err = body.Seek(0, io.SeekStart)
 			if err != nil {
 				return nil, err
 			}
-
-			regOpts.Token = token
-			if body != nil {
-				body.Seek(0, io.SeekStart)
-			}
-
-			continue
-		case resp.StatusCode == http.StatusNotFound:
-			return nil, os.ErrNotExist
-		case resp.StatusCode >= http.StatusBadRequest:
-			body, err := io.ReadAll(resp.Body)
-			if err != nil {
-				return nil, fmt.Errorf("%d: %s", resp.StatusCode, err)
-			}
-
-			return nil, fmt.Errorf("%d: %s", resp.StatusCode, body)
-		default:
-			return resp, nil
 		}
+
+		resp, err := makeRequest(ctx, method, requestURL, headers, body, regOpts)
+		if resp.StatusCode == http.StatusUnauthorized {
+			return nil, errUnauthorized
+		}
+
+		return resp, err
+	case resp.StatusCode == http.StatusNotFound:
+		return nil, os.ErrNotExist
+	case resp.StatusCode >= http.StatusBadRequest:
+		responseBody, err := io.ReadAll(resp.Body)
+		if err != nil {
+			return nil, fmt.Errorf("%d: %s", resp.StatusCode, err)
+		}
+		return nil, fmt.Errorf("%d: %s", resp.StatusCode, responseBody)
 	}

-	return nil, errMaxRetriesExceeded
+	return resp, nil
 }

 func makeRequest(ctx context.Context, method string, requestURL *url.URL, headers http.Header, body io.Reader, regOpts *RegistryOptions) (*http.Response, error) {
--- a/server/routes.go
+++ b/server/routes.go
@@ -2,6 +2,7 @@ package server

 import (
 	"context"
+	"crypto/sha256"
 	"encoding/json"
 	"errors"
 	"fmt"
@@ -26,6 +27,7 @@ import (

 	"github.com/jmorganca/ollama/api"
 	"github.com/jmorganca/ollama/llm"
+	"github.com/jmorganca/ollama/parser"
 	"github.com/jmorganca/ollama/version"
 )

@@ -409,8 +411,31 @@ func CreateModelHandler(c *gin.Context) {
 		return
 	}

-	if req.Name == "" || req.Path == "" {
-		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "name and path are required"})
+	if req.Name == "" {
+		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "name is required"})
+		return
+	}
+
+	if req.Path == "" && req.Modelfile == "" {
+		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "path or modelfile are required"})
+		return
+	}
+
+	var modelfile io.Reader = strings.NewReader(req.Modelfile)
+	if req.Path != "" && req.Modelfile == "" {
+		bin, err := os.Open(req.Path)
+		if err != nil {
+			c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("error reading modelfile: %s", err)})
+			return
+		}
+		defer bin.Close()
+
+		modelfile = bin
+	}
+
+	commands, err := parser.Parse(modelfile)
+	if err != nil {
+		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
 		return
 	}

@@ -424,7 +449,7 @@ func CreateModelHandler(c *gin.Context) {
 		ctx, cancel := context.WithCancel(c.Request.Context())
 		defer cancel()

-		if err := CreateModel(ctx, req.Name, req.Path, fn); err != nil {
+		if err := CreateModel(ctx, req.Name, commands, fn); err != nil {
 			ch <- gin.H{"error": err.Error()}
 		}
 	}()
@@ -625,6 +650,60 @@ func CopyModelHandler(c *gin.Context) {
 	}
 }

+func HeadBlobHandler(c *gin.Context) {
+	path, err := GetBlobsPath(c.Param("digest"))
+	if err != nil {
+		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
+		return
+	}
+
+	if _, err := os.Stat(path); err != nil {
+		c.AbortWithStatusJSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("blob %q not found", c.Param("digest"))})
+		return
+	}
+
+	c.Status(http.StatusOK)
+}
+
+func CreateBlobHandler(c *gin.Context) {
+	targetPath, err := GetBlobsPath(c.Param("digest"))
+	if err != nil {
+		c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
+		return
+	}
+
+	hash := sha256.New()
+	temp, err := os.CreateTemp(filepath.Dir(targetPath), c.Param("digest")+"-")
+	if err != nil {
+		c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
+		return
+	}
+	defer temp.Close()
+	defer os.Remove(temp.Name())
+
+	if _, err := io.Copy(temp, io.TeeReader(c.Request.Body, hash)); err != nil {
+		c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
+		return
+	}
+
+	if fmt.Sprintf("sha256:%x", hash.Sum(nil)) != c.Param("digest") {
+		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "digest does not match body"})
+		return
+	}
+
+	if err := temp.Close(); err != nil {
+		c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
+		return
+	}
+
+	if err := os.Rename(temp.Name(), targetPath); err != nil {
+		c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
+		return
+	}
+
+	c.Status(http.StatusCreated)
+}
+
 var defaultAllowOrigins = []string{
 	"localhost",
 	"127.0.0.1",
@@ -684,6 +763,8 @@ func Serve(ln net.Listener, allowOrigins []string) error {
 	r.POST("/api/copy", CopyModelHandler)
 	r.DELETE("/api/delete", DeleteModelHandler)
 	r.POST("/api/show", ShowModelHandler)
+	r.POST("/api/blobs/:digest", CreateBlobHandler)
+	r.HEAD("/api/blobs/:digest", HeadBlobHandler)

 	for _, method := range []string{http.MethodGet, http.MethodHead} {
 		r.Handle(method, "/", func(c *gin.Context) {
@@ -713,7 +794,7 @@ func Serve(ln net.Listener, allowOrigins []string) error {
 	if runtime.GOOS == "linux" {
 		// check compatibility to log warnings
 		if _, err := llm.CheckVRAM(); err != nil {
-			log.Printf("Warning: GPU support may not be enabled, check you have installed GPU drivers: %v", err)
+			log.Printf(err.Error())
 		}
 	}

--- a/server/upload.go
+++ b/server/upload.go
@@ -5,9 +5,9 @@ import (
 	"crypto/md5"
 	"errors"
 	"fmt"
-	"hash"
 	"io"
 	"log"
+	"math"
 	"net/http"
 	"net/url"
 	"os"
@@ -35,6 +35,8 @@ type blobUpload struct {

 	context.CancelFunc

+	file *os.File
+
 	done       bool
 	err        error
 	references atomic.Int32
@@ -42,8 +44,8 @@ type blobUpload struct {

 const (
 	numUploadParts          = 64
-	minUploadPartSize int64 = 95 * 1000 * 1000
-	maxUploadPartSize int64 = 1000 * 1000 * 1000
+	minUploadPartSize int64 = 100 * format.MegaByte
+	maxUploadPartSize int64 = 1000 * format.MegaByte
 )

 func (b *blobUpload) Prepare(ctx context.Context, requestURL *url.URL, opts *RegistryOptions) error {
@@ -55,7 +57,7 @@ func (b *blobUpload) Prepare(ctx context.Context, requestURL *url.URL, opts *Reg
 	if b.From != "" {
 		values := requestURL.Query()
 		values.Add("mount", b.Digest)
-		values.Add("from", b.From)
+		values.Add("from", ParseModelPath(b.From).GetNamespaceRepository())
 		requestURL.RawQuery = values.Encode()
 	}

@@ -77,6 +79,14 @@ func (b *blobUpload) Prepare(ctx context.Context, requestURL *url.URL, opts *Reg

 	b.Total = fi.Size()

+	// http.StatusCreated indicates a blob has been mounted
+	// ref: https://distribution.github.io/distribution/spec/api/#cross-repository-blob-mount
+	if resp.StatusCode == http.StatusCreated {
+		b.Completed.Store(b.Total)
+		b.done = true
+		return nil
+	}
+
 	var size = b.Total / numUploadParts
 	switch {
 	case size < minUploadPartSize:
@@ -120,12 +130,12 @@ func (b *blobUpload) Run(ctx context.Context, opts *RegistryOptions) {
 		return
 	}

-	f, err := os.Open(p)
+	b.file, err = os.Open(p)
 	if err != nil {
 		b.err = err
 		return
 	}
-	defer f.Close()
+	defer b.file.Close()

 	g, inner := errgroup.WithContext(ctx)
 	g.SetLimit(numUploadParts)
@@ -137,7 +147,6 @@ func (b *blobUpload) Run(ctx context.Context, opts *RegistryOptions) {
 			g.Go(func() error {
 				var err error
 				for try := 0; try < maxRetries; try++ {
-					part.ReadSeeker = io.NewSectionReader(f, part.Offset, part.Size)
 					err = b.uploadChunk(inner, http.MethodPatch, requestURL, part, opts)
 					switch {
 					case errors.Is(err, context.Canceled):
@@ -145,7 +154,10 @@ func (b *blobUpload) Run(ctx context.Context, opts *RegistryOptions) {
 					case errors.Is(err, errMaxRetriesExceeded):
 						return err
 					case err != nil:
-						log.Printf("%s part %d attempt %d failed: %v, retrying", b.Digest[7:19], part.N, try, err)
+						part.Reset()
+						sleep := time.Second * time.Duration(math.Pow(2, float64(try)))
+						log.Printf("%s part %d attempt %d failed: %v, retrying in %s", b.Digest[7:19], part.N, try, err, sleep)
+						time.Sleep(sleep)
 						continue
 					}

@@ -165,8 +177,16 @@ func (b *blobUpload) Run(ctx context.Context, opts *RegistryOptions) {
 	requestURL := <-b.nextURL

 	var sb strings.Builder
+
+	// calculate md5 checksum and add it to the commit request
 	for _, part := range b.Parts {
-		sb.Write(part.Sum(nil))
+		hash := md5.New()
+		if _, err := io.Copy(hash, io.NewSectionReader(b.file, part.Offset, part.Size)); err != nil {
+			b.err = err
+			return
+		}
+
+		sb.Write(hash.Sum(nil))
 	}

 	md5sum := md5.Sum([]byte(sb.String()))
@@ -180,29 +200,39 @@ func (b *blobUpload) Run(ctx context.Context, opts *RegistryOptions) {
 	headers.Set("Content-Type", "application/octet-stream")
 	headers.Set("Content-Length", "0")

-	resp, err := makeRequestWithRetry(ctx, http.MethodPut, requestURL, headers, nil, opts)
-	if err != nil {
-		b.err = err
+	for try := 0; try < maxRetries; try++ {
+		resp, err := makeRequestWithRetry(ctx, http.MethodPut, requestURL, headers, nil, opts)
+		if err != nil {
+			b.err = err
+			if errors.Is(err, context.Canceled) {
+				return
+			}
+
+			sleep := time.Second * time.Duration(math.Pow(2, float64(try)))
+			log.Printf("%s complete upload attempt %d failed: %v, retrying in %s", b.Digest[7:19], try, err, sleep)
+			time.Sleep(sleep)
+			continue
+		}
+		defer resp.Body.Close()
+
+		b.err = nil
+		b.done = true
 		return
 	}
-	defer resp.Body.Close()
-
-	b.done = true
 }

 func (b *blobUpload) uploadChunk(ctx context.Context, method string, requestURL *url.URL, part *blobUploadPart, opts *RegistryOptions) error {
-	part.Reset()
-
 	headers := make(http.Header)
 	headers.Set("Content-Type", "application/octet-stream")
 	headers.Set("Content-Length", fmt.Sprintf("%d", part.Size))
-	headers.Set("X-Redirect-Uploads", "1")

 	if method == http.MethodPatch {
+		headers.Set("X-Redirect-Uploads", "1")
 		headers.Set("Content-Range", fmt.Sprintf("%d-%d", part.Offset, part.Offset+part.Size-1))
 	}

-	resp, err := makeRequest(ctx, method, requestURL, headers, io.TeeReader(part.ReadSeeker, io.MultiWriter(part, part.Hash)), opts)
+	sr := io.NewSectionReader(b.file, part.Offset, part.Size)
+	resp, err := makeRequest(ctx, method, requestURL, headers, io.TeeReader(sr, part), opts)
 	if err != nil {
 		return err
 	}
@@ -227,6 +257,7 @@ func (b *blobUpload) uploadChunk(ctx context.Context, method string, requestURL
 			return err
 		}

+		// retry uploading to the redirect URL
 		for try := 0; try < maxRetries; try++ {
 			err = b.uploadChunk(ctx, http.MethodPut, redirectURL, part, nil)
 			switch {
@@ -235,7 +266,10 @@ func (b *blobUpload) uploadChunk(ctx context.Context, method string, requestURL
 			case errors.Is(err, errMaxRetriesExceeded):
 				return err
 			case err != nil:
-				log.Printf("%s part %d attempt %d failed: %v, retrying", b.Digest[7:19], part.N, try, err)
+				part.Reset()
+				sleep := time.Second * time.Duration(math.Pow(2, float64(try)))
+				log.Printf("%s part %d attempt %d failed: %v, retrying in %s", b.Digest[7:19], part.N, try, err, sleep)
+				time.Sleep(sleep)
 				continue
 			}

@@ -260,7 +294,7 @@ func (b *blobUpload) uploadChunk(ctx context.Context, method string, requestURL
 			return err
 		}

-		return fmt.Errorf("http status %d %s: %s", resp.StatusCode, resp.Status, body)
+		return fmt.Errorf("http status %s: %s", resp.Status, body)
 	}

 	if method == http.MethodPatch {
@@ -293,7 +327,7 @@ func (b *blobUpload) Wait(ctx context.Context, fn func(api.ProgressResponse)) er
 		}

 		fn(api.ProgressResponse{
-			Status:    fmt.Sprintf("uploading %s", b.Digest),
+			Status:    fmt.Sprintf("pushing %s", b.Digest[7:19]),
 			Digest:    b.Digest,
 			Total:     b.Total,
 			Completed: b.Completed.Load(),
@@ -307,14 +341,10 @@ func (b *blobUpload) Wait(ctx context.Context, fn func(api.ProgressResponse)) er

 type blobUploadPart struct {
 	// N is the part number
-	N      int
-	Offset int64
-	Size   int64
-	hash.Hash
-
+	N       int
+	Offset  int64
+	Size    int64
 	written int64
-
-	io.ReadSeeker
 	*blobUpload
 }

@@ -326,10 +356,8 @@ func (p *blobUploadPart) Write(b []byte) (n int, err error) {
 }

 func (p *blobUploadPart) Reset() {
-	p.Seek(0, io.SeekStart)
 	p.Completed.Add(-int64(p.written))
 	p.written = 0
-	p.Hash = md5.New()
 }

 func uploadBlob(ctx context.Context, mp ModelPath, layer *Layer, opts *RegistryOptions, fn func(api.ProgressResponse)) error {
@@ -344,7 +372,7 @@ func uploadBlob(ctx context.Context, mp ModelPath, layer *Layer, opts *RegistryO
 	default:
 		defer resp.Body.Close()
 		fn(api.ProgressResponse{
-			Status:    fmt.Sprintf("uploading %s", layer.Digest),
+			Status:    fmt.Sprintf("pushing %s", layer.Digest[7:19]),
 			Digest:    layer.Digest,
 			Total:     layer.Size,
 			Completed: layer.Size,
Author	SHA1	Message	Date
Patrick Devine	7550fd1b7f	use a pulsating spinner	2023-11-20 17:27:53 -08:00
Jeffrey Morgan	a3fcecf943	only set `main_gpu` if value > 0 is provided	2023-11-20 19:54:04 -05:00
Jeffrey Morgan	df07e4a097	remove redundant filename parameter (#1213 )	2023-11-20 17:05:36 -05:00
Michael Yang	0b7ade0d4c	Merge pull request #1212 from jmorganca/mxyng/metal enable metal for fp32, q5_0, q5_1	2023-11-20 13:56:39 -08:00
Michael Yang	19b7a4d715	recent llama.cpp update added kernels for fp32, q5_0, and q5_1	2023-11-20 13:44:31 -08:00
Bruce MacDonald	31ab453d37	resolve FROM path before sending modelfile (#1211 )	2023-11-20 16:43:48 -05:00
Jeffrey Morgan	35c4b5ec16	calculate hash separately from http request	2023-11-20 15:45:11 -05:00
James Braza	f24741ff39	Documenting how to view `Modelfile`s (#723 ) * Documented viewing Modelfiles in ollama.ai/library * Moved Modelfile in ollama.ai down per request	2023-11-20 15:24:29 -05:00
Jeffrey Morgan	8c4022b06b	fix initial progress stats	2023-11-20 14:33:46 -05:00
Jeffrey Morgan	433702f421	hide progress stats on completion	2023-11-20 14:22:39 -05:00
Jeffrey Morgan	6066c70edd	restore progress messages for older endpoints	2023-11-20 11:37:17 -05:00
Jeffrey Morgan	f10ac5de19	restore stats updated every second to progress bar	2023-11-20 10:58:19 -05:00
Jeffrey Morgan	93a108214c	only show decimal points for smaller file size numbers	2023-11-20 10:58:19 -05:00
Purinda Gunasekara	be61a81758	main-gpu argument is not getting passed to llamacpp, fixed. (#1192 )	2023-11-20 10:52:52 -05:00
Toni Soriano	2fdf1b5ff8	add laravel package to README.md (#1208 ) Co-authored-by: Toni <cloudstudio@Tonis-Mac-mini.local>	2023-11-20 10:48:35 -05:00
Huy Le	331068b964	Adding `ogpt.nvim` into the list of plugins! (#1190 ) * adding ollama.nvim for visibility * adding an ogpt.nvim neovim plugin	2023-11-20 10:39:14 -05:00
Andy Brenneke	0179d8eb6b	Add Rivet to Community Integrations (#1183 )	2023-11-20 10:36:47 -05:00
Eli Bendersky	be48741308	README: link to LangChainGo for talking to ollama, with an example (#1206 )	2023-11-20 10:35:07 -05:00
Jeffrey Morgan	6bbd6e26fb	fix temporary newline created and removed with spinner in `ollama run`	2023-11-20 00:49:08 -05:00
Jeffrey Morgan	e6ad4813d3	dont crash when redirecting stderr	2023-11-19 23:50:45 -05:00
Jeffrey Morgan	13ba6df5ab	enable cpu instructions on intel macs	2023-11-19 23:20:26 -05:00
Jeffrey Morgan	9d73d3a6b5	add back `part.Reset()`	2023-11-19 14:32:19 -05:00
Jeffrey Morgan	72cd336410	dont retry on upload complete context cancel	2023-11-19 14:32:19 -05:00
Jeffrey Morgan	1bd594b2fa	revert to using one open file for blob uploads	2023-11-19 14:32:19 -05:00
Jeffrey Morgan	9a8c21ac3d	use exponential everywhere	2023-11-19 14:32:19 -05:00
Jeffrey Morgan	f6b317e8c9	fix sending too little data in chunk upload body	2023-11-19 14:32:19 -05:00
Jeffrey Morgan	ac5076ce1e	exponential backoff up to 30s	2023-11-19 14:32:19 -05:00
Michael Yang	42c2e3a624	upload: retry complete upload	2023-11-19 14:32:19 -05:00
Michael Yang	cb42589792	adjust download/upload parts	2023-11-19 14:32:19 -05:00
Jeffrey Morgan	258addc799	fix comment in `progress.go`	2023-11-19 13:46:19 -05:00
Jeffrey Morgan	c06b9b7304	update progress rendering to be closer to `v0.1.10`	2023-11-19 13:43:21 -05:00
Jeffrey Morgan	95b9acd324	improve pull percentage rendering	2023-11-19 11:00:43 -05:00
Jeffrey Morgan	04cbf5ccc0	progress bar styling improvements	2023-11-19 09:54:33 -05:00
Jeffrey Morgan	e1d7056496	update progress statuses	2023-11-19 09:21:13 -05:00
Jeffrey Morgan	02524a56ff	check retry for authorization error	2023-11-19 00:19:53 -05:00
Jeffrey Morgan	1657c6abc7	add note to specify JSON in the prompt when using JSON mode	2023-11-18 22:59:26 -05:00
Jeffrey Morgan	12e046f12a	remove unused function	2023-11-18 22:16:51 -05:00
Jeffrey Morgan	36a3bbf65f	Update llm/llama.go	2023-11-18 21:25:07 -05:00
Bruce MacDonald	43a726149d	fix potentially inaccurate error message	2023-11-18 21:25:07 -05:00
Jeffrey Morgan	984714f131	update status text when transfering blob on `ollama create`	2023-11-18 09:40:10 -05:00
Jeffrey Morgan	bab9494176	add `-` separator to temp file created on `ollama create`	2023-11-18 09:39:52 -05:00
Jeffrey Morgan	85e4441c6a	cache docker builds	2023-11-18 08:51:38 -05:00
Michael Yang	42e43736a4	Merge pull request #1186 from jmorganca/mxyng/copy-blob fix cross device rename	2023-11-17 21:54:53 -08:00
Michael Yang	c6e6c8ee7e	fix cross device rename	2023-11-17 15:22:17 -08:00
Jeffrey Morgan	a185b29719	fix install script error on linux	2023-11-17 18:00:41 -05:00
Michael Yang	dc84b20d6b	Merge pull request #1104 from jmorganca/mxyng/jupyter add jupyter notebook example	2023-11-17 14:46:26 -08:00
Michael Yang	ad8659b980	Merge pull request #1161 from jmorganca/mxyng/systemd-placeholder placeholder environment variables	2023-11-17 14:45:38 -08:00
Michael Yang	c1bbf5ddee	Merge pull request #1134 from jmorganca/mxyng/progress progress bar	2023-11-17 14:03:35 -08:00
Bruce MacDonald	0b19e24d81	only retry once on auth failure (#1175 )	2023-11-17 14:22:35 -05:00
Michael Yang	3cb07d2773	simplify StopAndClear	2023-11-17 10:26:22 -08:00
Michael Yang	976068369b	stop all spinners on progress stop	2023-11-17 10:06:19 -08:00
Michael Yang	4d677ee389	no divide by zero	2023-11-17 10:06:19 -08:00
Michael Yang	7ea905871a	only move cursor up if pos > 0	2023-11-17 10:06:19 -08:00
Michael Yang	d6ecaa2cbf	update progress responses	2023-11-17 10:06:19 -08:00
Michael Yang	4dcf7a59b1	generate progress	2023-11-17 10:06:19 -08:00
Michael Yang	1c0e092ead	progress cmd	2023-11-17 10:06:19 -08:00
Michael Yang	c4a3ccd7ac	progress	2023-11-17 10:06:19 -08:00
Michael Yang	9f04e5a8ea	format bytes	2023-11-17 10:06:19 -08:00
Michael Yang	f91bb2f7f0	remove progressbar	2023-11-17 10:06:19 -08:00
Michael Yang	0813387414	Merge pull request #1177 from jmorganca/mxyng/faq faq: fix heading and add more details	2023-11-17 10:05:21 -08:00
Michael Yang	4936b5bb37	add jupyter readme	2023-11-17 10:04:52 -08:00
Michael Yang	f7f6d6c693	Update examples/jupyter-notebook/ollama.ipynb Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>	2023-11-17 09:30:07 -08:00
Michael Yang	a3053b66d2	add jupyter notebook example	2023-11-17 09:30:07 -08:00
Michael Yang	c82ead4d01	faq: fix heading and add more details	2023-11-17 09:02:17 -08:00
Michael Yang	90860b6a7e	update faq (#1176 )	2023-11-17 11:42:58 -05:00
Jeffrey Morgan	81092147c4	remove unnecessary `-X POST` from example `curl` commands	2023-11-17 09:50:38 -05:00
Jeffrey Morgan	92656a74b7	Use `llama2` as the model in `api.md`	2023-11-17 07:17:51 -05:00
Jeffrey Morgan	41434a7cdc	build intel mac with correct binary and compile flags	2023-11-16 22:14:51 -05:00
Michael Yang	71687ab809	Merge pull request #1164 from jmorganca/mxyng/faq update faq	2023-11-16 17:20:18 -08:00
Michael Yang	d8842b4d4b	update faq	2023-11-16 17:07:36 -08:00
Michael Yang	32add8577d	placeholder environment variables	2023-11-16 16:57:39 -08:00
Michael Yang	585f9c01fa	Merge pull request #1160 from jmorganca/mxyng/faq update faq	2023-11-16 16:48:51 -08:00
Michael Yang	c13bde962d	Update docs/faq.md Co-authored-by: Jeffrey Morgan <jmorganca@gmail.com>	2023-11-16 16:48:38 -08:00
Michael Yang	ee307937fd	update faq	2023-11-16 16:46:43 -08:00
Matt Williams	ab6639bc47	Merge pull request #1074 from jmorganca/mattw/loganalysisexample Log Analysis Example	2023-11-16 16:33:07 -08:00
Jeffrey Morgan	dbe6e77472	Update README.md	2023-11-16 16:46:38 -05:00
Bruce MacDonald	4b3f4bc7d9	return failure details when unauthorized to push (#1131 ) Co-authored-by: Jeffrey Morgan <jmorganca@gmail.com>	2023-11-16 16:44:18 -05:00
Michael Yang	a5ccf742c1	fix cross repo mounts	2023-11-16 16:33:30 -05:00
Michael Yang	e33ef391cd	fix push scope error for inherited model	2023-11-16 16:33:30 -05:00
yanndegat	75295b9528	install: fix enable contrib on debian 12 (#1151 ) On debian 12, sources definitions have moved from /etc/apt/sources.list to /etc/apt/sources.list.d/debian.sources	2023-11-16 15:53:06 -05:00
Matt Williams	db5ef3004c	Merge pull request #1079 from jmorganca/mattw/jsonexample Add example using JSON format output	2023-11-16 09:13:34 -08:00
Michael Yang	b5f158f046	add faq for proxies (#1147 )	2023-11-16 11:43:37 -05:00
Piero Savastano	30141b42e9	Add Cheshire Cat to community integrations (#1124 )	2023-11-16 11:30:54 -05:00
Dane Madsen	5f301ece1d	Add Maid to Community Integrations (#1120 )	2023-11-16 11:27:53 -05:00
Michael Yang	77954bea0e	Merge pull request #898 from jmorganca/mxyng/build-context create remote models	2023-11-15 16:41:12 -08:00
Michael Yang	54f92f01cb	update docs	2023-11-15 15:28:15 -08:00
Michael	30ae6e731e	Update randomaddresses.py	2023-11-15 18:24:50 -05:00
Michael	b28a30f7ba	Update examples/python-json-datagenerator/predefinedschema.py Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>	2023-11-15 18:23:36 -05:00
Jeffrey Morgan	ecd71347ab	Update faq.md	2023-11-15 18:17:13 -05:00
Jeffrey Morgan	8ee4cbea0f	Remove table of contents in `faq.md`	2023-11-15 18:16:27 -05:00
Michael Yang	652d90e1c7	Update server/images.go Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>	2023-11-15 15:16:23 -08:00
Michael Yang	bc22d5a38b	no blob response	2023-11-15 15:16:23 -08:00
Michael Yang	71d71d0988	update docs	2023-11-15 15:16:23 -08:00
Michael Yang	1901044b07	use checksum reference	2023-11-15 15:16:23 -08:00
Michael Yang	d660eebf22	fix create from model tag	2023-11-15 15:16:23 -08:00
Michael Yang	cac11c9137	update api docs	2023-11-15 15:16:23 -08:00
Michael Yang	a07c935d34	ignore non blobs	2023-11-15 15:16:23 -08:00
Michael Yang	1552cee59f	client create modelfile	2023-11-15 15:16:23 -08:00
Michael Yang	3ca56b5ada	add create modelfile field	2023-11-15 15:16:23 -08:00
Michael Yang	b0d14ed51c	refactor create model	2023-11-15 15:16:23 -08:00
Matt Williams	f61f340279	FAQ: answer a few faq questions (#1128 ) * faq: does ollama share my prompts Signed-off-by: Matt Williams <m@technovangelist.com> * faq: ollama and openai Signed-off-by: Matt Williams <m@technovangelist.com> * faq: vscode plugins Signed-off-by: Matt Williams <m@technovangelist.com> * faq: send a doc to Ollama Signed-off-by: Matt Williams <m@technovangelist.com> * extra spacing Signed-off-by: Matt Williams <m@technovangelist.com> * Update faq.md * Update faq.md --------- Signed-off-by: Matt Williams <m@technovangelist.com> Co-authored-by: Michael <mchiang0610@users.noreply.github.com>	2023-11-15 18:05:13 -05:00
Matt Williams	47ffb81db7	Update examples/python-json-datagenerator/readme.md Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>	2023-11-14 10:33:34 -08:00
Matt Williams	69795d2db0	Update examples/python-json-datagenerator/readme.md Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>	2023-11-14 10:33:16 -08:00
Matt Williams	acde0819d9	Update examples/python-json-datagenerator/randomaddresses.py Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>	2023-11-14 10:33:02 -08:00
Matt Williams	f748331aa3	Update examples/python-json-datagenerator/predefinedschema.py Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>	2023-11-14 10:32:45 -08:00
Matt Williams	f4edc302a8	Update examples/python-loganalysis/readme.md Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>	2023-11-14 10:31:22 -08:00
Matt Williams	64b7e0c218	Update examples/python-loganalysis/loganalysis.py Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>	2023-11-14 10:31:05 -08:00
Matt Williams	eced0d52ab	Update examples/python-loganalysis/loganalysis.py Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>	2023-11-14 10:30:30 -08:00
Matt Williams	96bf9cafa7	Update examples/python-loganalysis/loganalysis.py Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>	2023-11-14 10:30:17 -08:00
Matt Williams	b6817a83d8	Add gif and finish readme Signed-off-by: Matt Williams <m@technovangelist.com>	2023-11-10 16:41:48 -06:00
Matt Williams	73f3448ede	add example showing use of JSON format Signed-off-by: Matt Williams <m@technovangelist.com>	2023-11-10 16:33:56 -06:00
Matt Williams	e4f59ba073	better streaming plus gif Signed-off-by: Matt Williams <m@technovangelist.com>	2023-11-10 08:55:17 -06:00
Matt Williams	5de568bffe	Add a simple log analysis example Signed-off-by: Matt Williams <m@technovangelist.com>	2023-11-10 08:28:52 -06:00