use a pulsating spinner

only set main_gpu if value > 0 is provided
remove redundant filename parameter (#1213 )
2026-01-14 02:18:04 -05:00 · 2023-11-20 17:27:53 -08:00 · 2023-11-20 19:54:04 -05:00 · 2023-11-20 17:05:36 -05:00 · 2023-11-20 13:56:39 -08:00 · 2023-11-20 13:44:31 -08:00
28 changed files with 766 additions and 1577 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -6,3 +6,4 @@ scripts
 llm/llama.cpp/ggml
 llm/llama.cpp/gguf
 .env
+.cache
--- a/.gitignore
+++ b/.gitignore
@@ -6,3 +6,4 @@
 dist
 ollama
 ggml-metal.metal
+.cache
--- a/README.md
+++ b/README.md
@@ -206,7 +206,7 @@ Ollama has a REST API for running and managing models.
 For example, to generate text from a model:

 ```
-curl -X POST http://localhost:11434/api/generate -d '{
+curl http://localhost:11434/api/generate -d '{
  "model": "llama2",
  "prompt":"Why is the sky blue?"
 }'
@@ -234,11 +234,13 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [Emacs client](https://github.com/zweifisch/ollama)
 - [gen.nvim](https://github.com/David-Kunz/gen.nvim)
 - [ollama.nvim](https://github.com/nomnivore/ollama.nvim)
+- [ogpt.nvim](https://github.com/huynle/ogpt.nvim)
 - [gptel Emacs client](https://github.com/karthink/gptel)

 ### Libraries

 - [LangChain](https://python.langchain.com/docs/integrations/llms/ollama) and [LangChain.js](https://js.langchain.com/docs/modules/model_io/models/llms/integrations/ollama) with [example](https://js.langchain.com/docs/use_cases/question_answering/local_retrieval_qa)
+- [LangChainGo](https://github.com/tmc/langchaingo/) with [example](https://github.com/tmc/langchaingo/tree/main/examples/ollama-completion-example)
 - [LlamaIndex](https://gpt-index.readthedocs.io/en/stable/examples/llm/ollama.html)
 - [LiteLLM](https://github.com/BerriAI/litellm)
 - [OllamaSharp for .NET](https://github.com/awaescher/OllamaSharp)
@@ -247,6 +249,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [ModelFusion Typescript Library](https://modelfusion.dev/integration/model-provider/ollama)
 - [OllamaKit for Swift](https://github.com/kevinhermawan/OllamaKit)
 - [Ollama for Dart](https://github.com/breitburg/dart-ollama)
+- [Ollama for Laravel](https://github.com/cloudstudio/ollama-laravel)

 ### Mobile

@@ -262,3 +265,4 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [Dagger Chatbot](https://github.com/samalba/dagger-chatbot)
 - [Discord AI Bot](https://github.com/mekb-turtle/discord-ai-bot)
 - [Hass Ollama Conversation](https://github.com/ej52/hass-ollama-conversation)
+- [Rivet plugin](https://github.com/abrenneke/rivet-plugin-ollama)
--- a/cmd/cmd.go
+++ b/cmd/cmd.go
@@ -30,7 +30,7 @@ import (
 	"github.com/jmorganca/ollama/api"
 	"github.com/jmorganca/ollama/format"
 	"github.com/jmorganca/ollama/parser"
-	"github.com/jmorganca/ollama/progressbar"
+	"github.com/jmorganca/ollama/progress"
 	"github.com/jmorganca/ollama/readline"
 	"github.com/jmorganca/ollama/server"
 	"github.com/jmorganca/ollama/version"
@@ -48,14 +48,16 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 		return err
 	}

+	p := progress.NewProgress(os.Stderr)
+	defer p.Stop()
+
+	bars := make(map[string]*progress.Bar)
+
 	modelfile, err := os.ReadFile(filename)
 	if err != nil {
 		return err
 	}

-	spinner := NewSpinner("transferring context")
-	go spinner.Spin(100 * time.Millisecond)
-
 	commands, err := parser.Parse(bytes.NewReader(modelfile))
 	if err != nil {
 		return err
@@ -66,6 +68,10 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 		return err
 	}

+	status := "transferring model data"
+	spinner := progress.NewSpinner(status)
+	p.Add(status, spinner)
+
 	for _, c := range commands {
 		switch c.Name {
 		case "model", "adapter":
@@ -76,6 +82,10 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 				path = filepath.Join(home, path[2:])
 			}

+			if !filepath.IsAbs(path) {
+				path = filepath.Join(filepath.Dir(filename), path)
+			}
+
 			bin, err := os.Open(path)
 			if errors.Is(err, os.ErrNotExist) && c.Name == "model" {
 				continue
@@ -99,41 +109,34 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 		}
 	}

-	var currentDigest string
-	var bar *progressbar.ProgressBar
-
-	request := api.CreateRequest{Name: args[0], Path: filename, Modelfile: string(modelfile)}
 	fn := func(resp api.ProgressResponse) error {
-		if resp.Digest != currentDigest && resp.Digest != "" {
+		if resp.Digest != "" {
 			spinner.Stop()
-			currentDigest = resp.Digest
-			// pulling
-			bar = progressbar.DefaultBytes(
-				resp.Total,
-				resp.Status,
-			)
-			bar.Set64(resp.Completed)
-		} else if resp.Digest == currentDigest && resp.Digest != "" {
-			bar.Set64(resp.Completed)
-		} else {
-			currentDigest = ""
+
+			bar, ok := bars[resp.Digest]
+			if !ok {
+				bar = progress.NewBar(fmt.Sprintf("pulling %s...", resp.Digest[7:19]), resp.Total, resp.Completed)
+				bars[resp.Digest] = bar
+				p.Add(resp.Digest, bar)
+			}
+
+			bar.Set(resp.Completed)
+		} else if status != resp.Status {
 			spinner.Stop()
-			spinner = NewSpinner(resp.Status)
-			go spinner.Spin(100 * time.Millisecond)
+
+			status = resp.Status
+			spinner = progress.NewSpinner(status)
+			p.Add(status, spinner)
 		}

 		return nil
 	}

+	request := api.CreateRequest{Name: args[0], Modelfile: string(modelfile)}
 	if err := client.Create(context.Background(), &request, fn); err != nil {
 		return err
 	}

-	spinner.Stop()
-	if spinner.description != "success" {
-		return errors.New("unexpected end to create model")
-	}
-
 	return nil
 }

@@ -170,36 +173,46 @@ func PushHandler(cmd *cobra.Command, args []string) error {
 		return err
 	}

-	var currentDigest string
-	var bar *progressbar.ProgressBar
+	p := progress.NewProgress(os.Stderr)
+	defer p.Stop()
+
+	bars := make(map[string]*progress.Bar)
+	var status string
+	var spinner *progress.Spinner

-	request := api.PushRequest{Name: args[0], Insecure: insecure}
 	fn := func(resp api.ProgressResponse) error {
-		if resp.Digest != currentDigest && resp.Digest != "" {
-			currentDigest = resp.Digest
-			bar = progressbar.DefaultBytes(
-				resp.Total,
-				fmt.Sprintf("pushing %s...", resp.Digest[7:19]),
-			)
+		if resp.Digest != "" {
+			if spinner != nil {
+				spinner.Stop()
+			}

-			bar.Set64(resp.Completed)
-		} else if resp.Digest == currentDigest && resp.Digest != "" {
-			bar.Set64(resp.Completed)
-		} else {
-			currentDigest = ""
-			fmt.Println(resp.Status)
+			bar, ok := bars[resp.Digest]
+			if !ok {
+				bar = progress.NewBar(fmt.Sprintf("pushing %s...", resp.Digest[7:19]), resp.Total, resp.Completed)
+				bars[resp.Digest] = bar
+				p.Add(resp.Digest, bar)
+			}
+
+			bar.Set(resp.Completed)
+		} else if status != resp.Status {
+			if spinner != nil {
+				spinner.Stop()
+			}
+
+			status = resp.Status
+			spinner = progress.NewSpinner(status)
+			p.Add(status, spinner)
 		}
+
 		return nil
 	}

+	request := api.PushRequest{Name: args[0], Insecure: insecure}
 	if err := client.Push(context.Background(), &request, fn); err != nil {
 		return err
 	}

-	if bar != nil && !bar.IsFinished() {
-		return errors.New("unexpected end to push model")
-	}
-
+	spinner.Stop()
 	return nil
 }

@@ -350,46 +363,51 @@ func PullHandler(cmd *cobra.Command, args []string) error {
 		return err
 	}

-	return pull(args[0], insecure)
-}
-
-func pull(model string, insecure bool) error {
 	client, err := api.ClientFromEnvironment()
 	if err != nil {
 		return err
 	}

-	var currentDigest string
-	var bar *progressbar.ProgressBar
+	p := progress.NewProgress(os.Stderr)
+	defer p.Stop()
+
+	bars := make(map[string]*progress.Bar)
+
+	var status string
+	var spinner *progress.Spinner

-	request := api.PullRequest{Name: model, Insecure: insecure}
 	fn := func(resp api.ProgressResponse) error {
-		if resp.Digest != currentDigest && resp.Digest != "" {
-			currentDigest = resp.Digest
-			bar = progressbar.DefaultBytes(
-				resp.Total,
-				fmt.Sprintf("pulling %s...", resp.Digest[7:19]),
-			)
+		if resp.Digest != "" {
+			if spinner != nil {
+				spinner.Stop()
+			}

-			bar.Set64(resp.Completed)
-		} else if resp.Digest == currentDigest && resp.Digest != "" {
-			bar.Set64(resp.Completed)
-		} else {
-			currentDigest = ""
-			fmt.Println(resp.Status)
+			bar, ok := bars[resp.Digest]
+			if !ok {
+				bar = progress.NewBar(fmt.Sprintf("pulling %s...", resp.Digest[7:19]), resp.Total, resp.Completed)
+				bars[resp.Digest] = bar
+				p.Add(resp.Digest, bar)
+			}
+
+			bar.Set(resp.Completed)
+		} else if status != resp.Status {
+			if spinner != nil {
+				spinner.Stop()
+			}
+
+			status = resp.Status
+			spinner = progress.NewSpinner(status)
+			p.Add(status, spinner)
 		}

 		return nil
 	}

+	request := api.PullRequest{Name: args[0], Insecure: insecure}
 	if err := client.Pull(context.Background(), &request, fn); err != nil {
 		return err
 	}

-	if bar != nil && !bar.IsFinished() {
-		return errors.New("unexpected end to pull model")
-	}
-
 	return nil
 }

@@ -442,8 +460,11 @@ func generate(cmd *cobra.Command, model, prompt string, wordWrap bool, format st
 		return err
 	}

-	spinner := NewSpinner("")
-	go spinner.Spin(60 * time.Millisecond)
+	p := progress.NewProgress(os.Stderr)
+	defer p.StopAndClear()
+
+	spinner := progress.NewSpinner("")
+	p.Add("", spinner)

 	var latest api.GenerateResponse

@@ -475,9 +496,7 @@ func generate(cmd *cobra.Command, model, prompt string, wordWrap bool, format st

 	request := api.GenerateRequest{Model: model, Prompt: prompt, Context: generateContext, Format: format}
 	fn := func(response api.GenerateResponse) error {
-		if !spinner.IsFinished() {
-			spinner.Finish()
-		}
+		p.StopAndClear()

 		latest = response

@@ -511,7 +530,6 @@ func generate(cmd *cobra.Command, model, prompt string, wordWrap bool, format st

 	if err := client.Generate(cancelCtx, &request, fn); err != nil {
 		if strings.Contains(err.Error(), "context canceled") && abort {
-			spinner.Finish()
 			return nil
 		}
 		return err
--- a/cmd/spinner.go
+++ b/cmd/spinner.go
@@ -1,44 +0,0 @@
-package cmd
-
-import (
-	"fmt"
-	"os"
-	"time"
-
-	"github.com/jmorganca/ollama/progressbar"
-)
-
-type Spinner struct {
-	description string
-	*progressbar.ProgressBar
-}
-
-func NewSpinner(description string) *Spinner {
-	return &Spinner{
-		description: description,
-		ProgressBar: progressbar.NewOptions(-1,
-			progressbar.OptionSetWriter(os.Stderr),
-			progressbar.OptionThrottle(60*time.Millisecond),
-			progressbar.OptionSpinnerType(14),
-			progressbar.OptionSetRenderBlankState(true),
-			progressbar.OptionSetElapsedTime(false),
-			progressbar.OptionClearOnFinish(),
-			progressbar.OptionSetDescription(description),
-		),
-	}
-}
-
-func (s *Spinner) Spin(tick time.Duration) {
-	for range time.Tick(tick) {
-		if s.IsFinished() {
-			break
-		}
-
-		s.Add(1)
-	}
-}
-
-func (s *Spinner) Stop() {
-	s.Finish()
-	fmt.Println(s.description)
-}
--- a/docs/api.md
+++ b/docs/api.md
@@ -51,14 +51,16 @@ Advanced parameters (optional):

 ### JSON mode

-Enable JSON mode by setting the `format` parameter to `json` and specifying the model should use JSON in the `prompt`. This will structure the response as valid JSON. See the JSON mode [example](#request-json-mode) below.
+Enable JSON mode by setting the `format` parameter to `json`. This will structure the response as valid JSON. See the JSON mode [example](#request-json-mode) below.
+
+> Note: it's important to instruct the model to use JSON in the `prompt`. Otherwise, the model may generate large amounts whitespace.

 ### Examples

 #### Request

 ```shell
-curl -X POST http://localhost:11434/api/generate -d '{
+curl http://localhost:11434/api/generate -d '{
  "model": "llama2",
  "prompt": "Why is the sky blue?"
 }'
@@ -113,8 +115,8 @@ To calculate how fast the response is generated in tokens per second (token/s),
 #### Request (No streaming)

 ```shell
-curl -X POST http://localhost:11434/api/generate -d '{
-  "model": "llama2:7b",
+curl http://localhost:11434/api/generate -d '{
+  "model": "llama2",
  "prompt": "Why is the sky blue?",
  "stream": false
 }'
@@ -126,7 +128,7 @@ If `stream` is set to `false`, the response will be a single JSON object:

 ```json
 {
-  "model": "llama2:7b",
+  "model": "llama2",
  "created_at": "2023-08-04T19:22:45.499127Z",
  "response": "The sky is blue because it is the color of the sky.",
  "context": [1, 2, 3],
@@ -147,7 +149,7 @@ If `stream` is set to `false`, the response will be a single JSON object:
 In some cases you may wish to bypass the templating system and provide a full prompt. In this case, you can use the `raw` parameter to disable formatting and context.

 ```shell
-curl -X POST http://localhost:11434/api/generate -d '{
+curl http://localhost:11434/api/generate -d '{
  "model": "mistral",
  "prompt": "[INST] why is the sky blue? [/INST]",
  "raw": true,
@@ -175,7 +177,7 @@ curl -X POST http://localhost:11434/api/generate -d '{
 #### Request (JSON mode)

 ```shell
-curl -X POST http://localhost:11434/api/generate -d '{
+curl http://localhost:11434/api/generate -d '{
  "model": "llama2",
  "prompt": "What color is the sky at different times of the day? Respond using JSON",
  "format": "json",
@@ -224,8 +226,8 @@ The value of `response` will be a string containing JSON similar to:
 If you want to set custom options for the model at runtime rather than in the Modelfile, you can do so with the `options` parameter. This example sets every available option, but you can set any of them individually and omit the ones you do not want to override.

 ```shell
-curl -X POST http://localhost:11434/api/generate -d '{
-  "model": "llama2:7b",
+curl http://localhost:11434/api/generate -d '{
+  "model": "llama2",
  "prompt": "Why is the sky blue?",
  "stream": false,
  "options": {
@@ -270,7 +272,7 @@ curl -X POST http://localhost:11434/api/generate -d '{

 ```json
 {
-  "model": "llama2:7b",
+  "model": "llama2",
  "created_at": "2023-08-04T19:22:45.499127Z",
  "response": "The sky is blue because it is the color of the sky.",
  "context": [1, 2, 3],
@@ -297,19 +299,18 @@ Create a model from a [`Modelfile`](./modelfile.md). It is recommended to set `m
 ### Parameters

 - `name`: name of the model to create
- `path`: path to the Modelfile (deprecated: please use modelfile instead)
 - `modelfile`: contents of the Modelfile
 - `stream`: (optional) if `false` the response will be returned as a single response object, rather than a stream of objects
+- `path` (deprecated): path to the Modelfile

 ### Examples

 #### Request

 ```shell
-curl -X POST http://localhost:11434/api/create -d '{
+curl http://localhost:11434/api/create -d '{
  "name": "mario",
-  "path": "~/Modelfile",
-  "modelfile": "FROM llama2"
+  "modelfile": "FROM llama2\nSYSTEM You are mario from Super Mario Bros."
 }'
 ```

@@ -395,7 +396,7 @@ A single JSON object will be returned.
 {
  "models": [
    {
-      "name": "llama2:7b",
+      "name": "llama2",
      "modified_at": "2023-08-02T17:02:23.713454393-07:00",
      "size": 3791730596
    },
@@ -426,7 +427,7 @@ Show details about a model including modelfile, template, parameters, license, a

 ```shell
 curl http://localhost:11434/api/show -d '{
-  "name": "llama2:7b"
+  "name": "llama2"
 }'
 ```

@@ -455,7 +456,7 @@ Copy a model. Creates a model with another name from an existing model.

 ```shell
 curl http://localhost:11434/api/copy -d '{
-  "source": "llama2:7b",
+  "source": "llama2",
  "destination": "llama2-backup"
 }'
 ```
@@ -509,8 +510,8 @@ Download a model from the ollama library. Cancelled pulls are resumed from where
 #### Request

 ```shell
-curl -X POST http://localhost:11434/api/pull -d '{
-  "name": "llama2:7b"
+curl http://localhost:11434/api/pull -d '{
+  "name": "llama2"
 }'
 ```

@@ -581,7 +582,7 @@ Upload a model to a model library. Requires registering for ollama.ai and adding
 #### Request

 ```shell
-curl -X POST http://localhost:11434/api/push -d '{
+curl http://localhost:11434/api/push -d '{
  "name": "mattw/pygmalion:latest"
 }'
 ```
@@ -649,8 +650,8 @@ Advanced parameters:
 #### Request

 ```shell
-curl -X POST http://localhost:11434/api/embeddings -d '{
-  "model": "llama2:7b",
+curl http://localhost:11434/api/embeddings -d '{
+  "model": "llama2",
  "prompt": "Here is an article about llamas..."
 }'
 ```
--- a/docs/faq.md
+++ b/docs/faq.md
@@ -148,3 +148,9 @@ Build and run this image:
 docker build -t ollama-with-ca .
 docker run -d -e HTTPS_PROXY=https://my.proxy.example.com -p 11434:11434 ollama-with-ca
 ```
+
+## How do I use Ollama with GPU acceleration in Docker?
+
+The Ollama Docker container can be configured with GPU acceleration in Linux or Windows (with WSL2). This requires the [nvidia-container-toolkit](https://github.com/NVIDIA/nvidia-container-toolkit). See [ollama/ollama](https://hub.docker.com/r/ollama/ollama) for more details.
+
+GPU acceleration is not available for Docker Desktop in macOS due to the lack of GPU passthrough and emulation.
--- a/docs/modelfile.md
+++ b/docs/modelfile.md
@@ -41,6 +41,8 @@ INSTRUCTION arguments

 ## Examples

+### Basic `Modelfile`
+
 An example of a `Modelfile` creating a mario blueprint:

 ```modelfile
@@ -63,6 +65,35 @@ To use this:

 More examples are available in the [examples directory](../examples).

+### `Modelfile`s in [ollama.ai/library][1]
+
+There are two ways to view `Modelfile`s underlying the models in [ollama.ai/library][1]:
+
+- Option 1: view a details page from a model's tags page:
+   1. Go to a particular model's tags (e.g. https://ollama.ai/library/llama2/tags)
+   2. Click on a tag (e.g. https://ollama.ai/library/llama2:13b)
+   3. Scroll down to "Layers"
+      - Note: if the [`FROM` instruction](#from-required) is not present,
+        it means the model was created from a local file
+- Option 2: use `ollama show` to print the `Modelfile` like so:
+
+  ```bash
+  > ollama show --modelfile llama2:13b
+  # Modelfile generated by "ollama show"
+  # To build a new Modelfile based on this one, replace the FROM line with:
+  # FROM llama2:13b
+
+  FROM /root/.ollama/models/blobs/sha256:123abc
+  TEMPLATE """[INST] {{ if and .First .System }}<<SYS>>{{ .System }}<</SYS>>
+
+  {{ end }}{{ .Prompt }} [/INST] """
+  SYSTEM """"""
+  PARAMETER stop [INST]
+  PARAMETER stop [/INST]
+  PARAMETER stop <<SYS>>
+  PARAMETER stop <</SYS>>
+  ```
+
 ## Instructions

 ### FROM (Required)
@@ -177,3 +208,5 @@ LICENSE """

 - the **`Modelfile` is not case sensitive**. In the examples, we use uppercase for instructions to make it easier to distinguish it from arguments.
 - Instructions can be in any order. In the examples, we start with FROM instruction to keep it easily readable.
+
+[1]: https://ollama.ai/library
--- a/examples/jupyter-notebook/README.md
+++ b/examples/jupyter-notebook/README.md
@@ -0,0 +1,5 @@
+# Ollama Jupyter Notebook
+
+This example downloads and installs Ollama in a Jupyter instance such as Google Colab. It will start the Ollama service and expose an endpoint using `ngrok` which can be used to communicate with the Ollama instance remotely.
+
+For best results, use an instance with GPU accelerator.
--- a/examples/jupyter-notebook/ollama.ipynb
+++ b/examples/jupyter-notebook/ollama.ipynb
@@ -0,0 +1,102 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "93f59dcb-c588-41b8-a792-55d88ade739c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Download and run the Ollama Linux install script\n",
+    "!curl https://ollama.ai/install.sh | sh\n",
+    "!command -v systemctl >/dev/null && sudo systemctl stop ollama"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "658c147e-c7f8-490e-910e-62b80f577dda",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install aiohttp pyngrok\n",
+    "\n",
+    "import os\n",
+    "import asyncio\n",
+    "from aiohttp import ClientSession\n",
+    "\n",
+    "# Set LD_LIBRARY_PATH so the system NVIDIA library becomes preferred\n",
+    "# over the built-in library. This is particularly important for \n",
+    "# Google Colab which installs older drivers\n",
+    "os.environ.update({'LD_LIBRARY_PATH': '/usr/lib64-nvidia'})\n",
+    "\n",
+    "async def run(cmd):\n",
+    "  '''\n",
+    "  run is a helper function to run subcommands asynchronously.\n",
+    "  '''\n",
+    "  print('>>> starting', *cmd)\n",
+    "  p = await asyncio.subprocess.create_subprocess_exec(\n",
+    "      *cmd,\n",
+    "      stdout=asyncio.subprocess.PIPE,\n",
+    "      stderr=asyncio.subprocess.PIPE,\n",
+    "  )\n",
+    "\n",
+    "  async def pipe(lines):\n",
+    "    async for line in lines:\n",
+    "      print(line.strip().decode('utf-8'))\n",
+    "\n",
+    "  await asyncio.gather(\n",
+    "      pipe(p.stdout),\n",
+    "      pipe(p.stderr),\n",
+    "  )\n",
+    "\n",
+    "\n",
+    "await asyncio.gather(\n",
+    "    run(['ollama', 'serve']),\n",
+    "    run(['ngrok', 'http', '--log', 'stderr', '11434']),\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e7735a55-9aad-4caf-8683-52e2163ba53b",
+   "metadata": {},
+   "source": [
+    "The previous cell starts two processes, `ollama` and `ngrok`. The log output will show a line like the following which describes the external address.\n",
+    "\n",
+    "```\n",
+    "t=2023-11-12T22:55:56+0000 lvl=info msg=\"started tunnel\" obj=tunnels name=command_line addr=http://localhost:11434 url=https://8249-34-125-179-11.ngrok.io\n",
+    "```\n",
+    "\n",
+    "The external address in this case is `https://8249-34-125-179-11.ngrok.io` which can be passed into `OLLAMA_HOST` to access this instance.\n",
+    "\n",
+    "```bash\n",
+    "export OLLAMA_HOST=https://8249-34-125-179-11.ngrok.io\n",
+    "ollama list\n",
+    "ollama run mistral\n",
+    "```"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/format/bytes.go
+++ b/format/bytes.go
@@ -1,23 +1,45 @@
 package format

-import "fmt"
+import (
+	"fmt"
+	"math"
+)

 const (
 	Byte     = 1
 	KiloByte = Byte * 1000
 	MegaByte = KiloByte * 1000
 	GigaByte = MegaByte * 1000
+	TeraByte = GigaByte * 1000
 )

 func HumanBytes(b int64) string {
+	var value float64
+	var unit string
+
 	switch {
-	case b > GigaByte:
-		return fmt.Sprintf("%.1f GB", float64(b)/GigaByte)
-	case b > MegaByte:
-		return fmt.Sprintf("%.1f MB", float64(b)/MegaByte)
-	case b > KiloByte:
-		return fmt.Sprintf("%.1f KB", float64(b)/KiloByte)
+	case b >= TeraByte:
+		value = float64(b) / TeraByte
+		unit = "TB"
+	case b >= GigaByte:
+		value = float64(b) / GigaByte
+		unit = "GB"
+	case b >= MegaByte:
+		value = float64(b) / MegaByte
+		unit = "MB"
+	case b >= KiloByte:
+		value = float64(b) / KiloByte
+		unit = "KB"
 	default:
 		return fmt.Sprintf("%d B", b)
 	}
+
+	switch {
+	case value >= 100:
+		return fmt.Sprintf("%d %s", int(value), unit)
+	case value != math.Trunc(value):
+		return fmt.Sprintf("%.1f %s", value, unit)
+	default:
+		return fmt.Sprintf("%d %s", int(value), unit)
+	}
 }
--- a/llm/llama.cpp/generate_darwin_amd64.go
+++ b/llm/llama.cpp/generate_darwin_amd64.go
@@ -7,13 +7,13 @@ package llm
 //go:generate git -C ggml apply ../patches/0002-34B-model-support.patch
 //go:generate git -C ggml apply ../patches/0003-metal-fix-synchronization-in-new-matrix-multiplicati.patch
 //go:generate git -C ggml apply ../patches/0004-metal-add-missing-barriers-for-mul-mat-2699.patch
-//go:generate cmake -S ggml -B ggml/build/cpu -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0
+//go:generate cmake -S ggml -B ggml/build/cpu -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_NAME=Darwin -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0
 //go:generate cmake --build ggml/build/cpu --target server --config Release
 //go:generate mv ggml/build/cpu/bin/server ggml/build/cpu/bin/ollama-runner

 //go:generate git submodule update --force gguf
 //go:generate git -C gguf apply ../patches/0001-update-default-log-target.patch
 //go:generate git -C gguf apply ../patches/0001-metal-handle-ggml_scale-for-n-4-0-close-3754.patch
-//go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_METAL=off -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off
+//go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_METAL=off -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_NAME=Darwin -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_AVX512=off -DLLAMA_FMA=on -DLLAMA_F16C=on
 //go:generate cmake --build gguf/build/cpu --target server --config Release
 //go:generate mv gguf/build/cpu/bin/server gguf/build/cpu/bin/ollama-runner
--- a/llm/llama.go
+++ b/llm/llama.go
@@ -226,7 +226,7 @@ type llama struct {
 }

 var (
-	errNvidiaSMI     = errors.New("nvidia-smi command failed")
+	errNvidiaSMI     = errors.New("warning: gpu support may not be enabled, check that you have installed GPU drivers: nvidia-smi command failed")
 	errAvailableVRAM = errors.New("not enough VRAM available, falling back to CPU only")
 )

@@ -343,6 +343,10 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers
 		"--embedding",
 	}

+	if opts.MainGPU > 0 {
+		params = append(params, "--main-gpu", fmt.Sprintf("%d", opts.MainGPU))
+	}
+
 	if opts.RopeFrequencyBase > 0 {
 		params = append(params, "--rope-freq-base", fmt.Sprintf("%f", opts.RopeFrequencyBase))
 	}
@@ -544,6 +548,7 @@ func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string,
 		"stream":            true,
 		"n_predict":         llm.NumPredict,
 		"n_keep":            llm.NumKeep,
+		"main_gpu":          llm.MainGPU,
 		"temperature":       llm.Temperature,
 		"top_k":             llm.TopK,
 		"top_p":             llm.TopP,
--- a/llm/llm.go
+++ b/llm/llm.go
@@ -41,20 +41,13 @@ func New(workDir, model string, adapters []string, opts api.Options) (LLM, error

 	if runtime.GOOS == "darwin" {
 		switch ggml.FileType() {
-		case "Q8_0":
+		case "F32", "Q5_0", "Q5_1", "Q8_0":
 			if ggml.Name() != "gguf" && opts.NumGPU != 0 {
 				// GGML Q8_0 do not support Metal API and will
 				// cause the runner to segmentation fault so disable GPU
 				log.Printf("WARNING: GPU disabled for F32, Q5_0, Q5_1, and Q8_0")
 				opts.NumGPU = 0
 			}
-		case "F32", "Q5_0", "Q5_1":
-			if opts.NumGPU != 0 {
-				// F32, Q5_0, Q5_1, and Q8_0 do not support Metal API and will
-				// cause the runner to segmentation fault so disable GPU
-				log.Printf("WARNING: GPU disabled for F32, Q5_0, Q5_1, and Q8_0")
-				opts.NumGPU = 0
-			}
 		}

 		var requiredMemory int64
--- a/main.go
+++ b/main.go
@@ -2,11 +2,25 @@ package main

 import (
 	"context"
+	"fmt"
+	"os"
+	"os/signal"
+	"syscall"

 	"github.com/jmorganca/ollama/cmd"
 	"github.com/spf13/cobra"
 )

 func main() {
+	sigChan := make(chan os.Signal, 1)
+	signal.Notify(sigChan, syscall.SIGINT)
+
+	go func() {
+		<-sigChan
+		fmt.Print("\033[?25h")
+
+		os.Exit(0)
+	}()
+
 	cobra.CheckErr(cmd.NewCLI().ExecuteContext(context.Background()))
 }
--- a/progress/bar.go
+++ b/progress/bar.go
@@ -0,0 +1,155 @@
+package progress
+
+import (
+	"fmt"
+	"math"
+	"os"
+	"strings"
+	"time"
+
+	"github.com/jmorganca/ollama/format"
+	"golang.org/x/term"
+)
+
+type Stats struct {
+	rate      int64
+	value     int64
+	remaining time.Duration
+}
+
+type Bar struct {
+	message      string
+	messageWidth int
+
+	maxValue     int64
+	initialValue int64
+	currentValue int64
+
+	started time.Time
+
+	stats   Stats
+	statted time.Time
+}
+
+func NewBar(message string, maxValue, initialValue int64) *Bar {
+	return &Bar{
+		message:      message,
+		messageWidth: -1,
+		maxValue:     maxValue,
+		initialValue: initialValue,
+		currentValue: initialValue,
+		started:      time.Now(),
+	}
+}
+
+func (b *Bar) String() string {
+	termWidth, _, err := term.GetSize(int(os.Stderr.Fd()))
+	if err != nil {
+		termWidth = 80
+	}
+
+	var pre, mid, suf strings.Builder
+
+	if b.message != "" {
+		message := strings.TrimSpace(b.message)
+		if b.messageWidth > 0 && len(message) > b.messageWidth {
+			message = message[:b.messageWidth]
+		}
+
+		fmt.Fprintf(&pre, "%s", message)
+		if b.messageWidth-pre.Len() >= 0 {
+			pre.WriteString(strings.Repeat(" ", b.messageWidth-pre.Len()))
+		}
+
+		pre.WriteString(" ")
+	}
+
+	fmt.Fprintf(&pre, "%3.0f%% ", math.Floor(b.percent()))
+	fmt.Fprintf(&suf, "(%s/%s", format.HumanBytes(b.currentValue), format.HumanBytes(b.maxValue))
+
+	stats := b.Stats()
+	rate := int64(stats.rate)
+	if rate > 0 {
+		fmt.Fprintf(&suf, ", %s/s", format.HumanBytes(rate))
+	}
+
+	fmt.Fprintf(&suf, ")")
+
+	elapsed := time.Since(b.started)
+	if b.percent() < 100 && rate > 0 {
+		fmt.Fprintf(&suf, " [%s:%s]", elapsed.Round(time.Second), stats.remaining)
+	} else {
+		fmt.Fprintf(&suf, "        ")
+	}
+
+	mid.WriteString("▕")
+
+	// add 3 extra spaces: 2 boundary characters and 1 space at the end
+	f := termWidth - pre.Len() - suf.Len() - 3
+	n := int(float64(f) * b.percent() / 100)
+
+	if n > 0 {
+		mid.WriteString(strings.Repeat("█", n))
+	}
+
+	if f-n > 0 {
+		mid.WriteString(strings.Repeat(" ", f-n))
+	}
+
+	mid.WriteString("▏")
+
+	return pre.String() + mid.String() + suf.String()
+}
+
+func (b *Bar) Set(value int64) {
+	if value >= b.maxValue {
+		value = b.maxValue
+	}
+
+	b.currentValue = value
+}
+
+func (b *Bar) percent() float64 {
+	if b.maxValue > 0 {
+		return float64(b.currentValue) / float64(b.maxValue) * 100
+	}
+
+	return 0
+}
+
+func (b *Bar) Stats() Stats {
+	if time.Since(b.statted) < time.Second {
+		return b.stats
+	}
+
+	switch {
+	case b.statted.IsZero():
+		b.stats = Stats{
+			value:     b.initialValue,
+			rate:      0,
+			remaining: 0,
+		}
+	case b.currentValue >= b.maxValue:
+		b.stats = Stats{
+			value:     b.maxValue,
+			rate:      0,
+			remaining: 0,
+		}
+	default:
+		rate := b.currentValue - b.stats.value
+		var remaining time.Duration
+		if rate > 0 {
+			remaining = time.Second * time.Duration((float64(b.maxValue-b.currentValue))/(float64(rate)))
+		}
+
+		b.stats = Stats{
+			value:     b.currentValue,
+			rate:      rate,
+			remaining: remaining,
+		}
+	}
+
+	b.statted = time.Now()
+
+	return b.stats
+}
--- a/progress/progress.go
+++ b/progress/progress.go
@@ -0,0 +1,113 @@
+package progress
+
+import (
+	"fmt"
+	"io"
+	"sync"
+	"time"
+)
+
+type State interface {
+	String() string
+}
+
+type Progress struct {
+	mu sync.Mutex
+	w  io.Writer
+
+	pos int
+
+	ticker *time.Ticker
+	states []State
+}
+
+func NewProgress(w io.Writer) *Progress {
+	p := &Progress{w: w}
+	go p.start()
+	return p
+}
+
+func (p *Progress) stop() bool {
+	for _, state := range p.states {
+		if spinner, ok := state.(*Spinner); ok {
+			spinner.Stop()
+		}
+	}
+
+	if p.ticker != nil {
+		p.ticker.Stop()
+		p.ticker = nil
+		p.render()
+		return true
+	}
+
+	return false
+}
+
+func (p *Progress) Stop() bool {
+	stopped := p.stop()
+	if stopped {
+		fmt.Fprint(p.w, "\n")
+	}
+	return stopped
+}
+
+func (p *Progress) StopAndClear() bool {
+	fmt.Fprint(p.w, "\033[?25l")
+	defer fmt.Fprint(p.w, "\033[?25h")
+
+	stopped := p.stop()
+	if stopped {
+		// clear all progress lines
+		for i := 0; i < p.pos; i++ {
+			if i > 0 {
+				fmt.Fprint(p.w, "\033[A")
+			}
+			fmt.Fprint(p.w, "\033[2K\033[1G")
+		}
+	}
+
+	return stopped
+}
+
+func (p *Progress) Add(key string, state State) {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+
+	p.states = append(p.states, state)
+}
+
+func (p *Progress) render() error {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+
+	// clear already rendered progress lines
+	for i := 0; i < p.pos; i++ {
+		if i > 0 {
+			fmt.Fprint(p.w, "\033[A")
+		}
+		fmt.Fprint(p.w, "\033[2K\033[1G")
+	}
+
+	// render progress lines
+	for i, state := range p.states {
+		fmt.Fprint(p.w, state.String())
+		if i < len(p.states)-1 {
+			fmt.Fprint(p.w, "\n")
+		}
+	}
+
+	p.pos = len(p.states)
+
+	return nil
+}
+
+func (p *Progress) start() {
+	p.ticker = time.NewTicker(100 * time.Millisecond)
+	fmt.Fprint(p.w, "\033[?25l")
+	defer fmt.Fprintln(p.w, "\033[?25h")
+
+	for range p.ticker.C {
+		p.render()
+	}
+}
--- a/progress/spinner.go
+++ b/progress/spinner.go
@@ -0,0 +1,72 @@
+package progress
+
+import (
+	"fmt"
+	"strings"
+	"time"
+)
+
+type Spinner struct {
+	message      string
+	messageWidth int
+
+	value int
+
+	ticker  *time.Ticker
+	started time.Time
+	stopped time.Time
+}
+
+func NewSpinner(message string) *Spinner {
+	s := &Spinner{
+		message: message,
+		started: time.Now(),
+		value:   231,
+	}
+	go s.start()
+	return s
+}
+
+func (s *Spinner) String() string {
+	var sb strings.Builder
+	if len(s.message) > 0 {
+		message := strings.TrimSpace(s.message)
+		if s.messageWidth > 0 && len(message) > s.messageWidth {
+			message = message[:s.messageWidth]
+		}
+
+		fmt.Fprintf(&sb, "%s", message)
+		if s.messageWidth-sb.Len() >= 0 {
+			sb.WriteString(strings.Repeat(" ", s.messageWidth-sb.Len()))
+		}
+
+		sb.WriteString(" ")
+	}
+
+	if s.stopped.IsZero() {
+		sb.WriteString(fmt.Sprintf("\033[48;5;%dm ", s.value))
+		sb.WriteString("\033[0m")
+	}
+
+	return sb.String()
+}
+
+func (s *Spinner) start() {
+	s.ticker = time.NewTicker(40 * time.Millisecond)
+	for range s.ticker.C {
+		if s.value < 255 {
+			s.value++
+		} else {
+			s.value = 231
+		}
+		if !s.stopped.IsZero() {
+			return
+		}
+	}
+}
+
+func (s *Spinner) Stop() {
+	if s.stopped.IsZero() {
+		s.stopped = time.Now()
+	}
+}
--- a/progressbar/LICENSE
+++ b/progressbar/LICENSE
@@ -1,21 +0,0 @@
-MIT License
-
-Copyright (c) 2017 Zack
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
--- a/progressbar/README.md
+++ b/progressbar/README.md
@@ -1,121 +0,0 @@
-# progressbar
-
-[![CI](https://github.com/schollz/progressbar/actions/workflows/ci.yml/badge.svg?branch=main&event=push)](https://github.com/schollz/progressbar/actions/workflows/ci.yml)
-[![go report card](https://goreportcard.com/badge/github.com/schollz/progressbar)](https://goreportcard.com/report/github.com/schollz/progressbar) 
-[![coverage](https://img.shields.io/badge/coverage-84%25-brightgreen.svg)](https://gocover.io/github.com/schollz/progressbar)
-[![godocs](https://godoc.org/github.com/schollz/progressbar?status.svg)](https://godoc.org/github.com/schollz/progressbar/v3) 
-
-A very simple thread-safe progress bar which should work on every OS without problems. I needed a progressbar for [croc](https://github.com/schollz/croc) and everything I tried had problems, so I made another one. In order to be OS agnostic I do not plan to support [multi-line outputs](https://github.com/schollz/progressbar/issues/6).
-
-
-## Install
-
-```
-go get -u github.com/schollz/progressbar/v3
-```
-
-## Usage 
-
-### Basic usage
-
-```golang
-bar := progressbar.Default(100)
-for i := 0; i < 100; i++ {
-    bar.Add(1)
-    time.Sleep(40 * time.Millisecond)
-}
-```
-
-which looks like:
-
-![Example of basic bar](examples/basic/basic.gif)
-
-
-### I/O operations
-
-The `progressbar` implements an `io.Writer` so it can automatically detect the number of bytes written to a stream, so you can use it as a progressbar for an `io.Reader`.
-
-```golang
-req, _ := http.NewRequest("GET", "https://dl.google.com/go/go1.14.2.src.tar.gz", nil)
-resp, _ := http.DefaultClient.Do(req)
-defer resp.Body.Close()
-
-f, _ := os.OpenFile("go1.14.2.src.tar.gz", os.O_CREATE|os.O_WRONLY, 0644)
-defer f.Close()
-
-bar := progressbar.DefaultBytes(
-    resp.ContentLength,
-    "downloading",
-)
-io.Copy(io.MultiWriter(f, bar), resp.Body)
-```
-
-which looks like:
-
-![Example of download bar](examples/download/download.gif)
-
-
-### Progress bar with unknown length
-
-A progressbar with unknown length is a spinner. Any bar with -1 length will automatically convert it to a spinner with a customizable spinner type. For example, the above code can be run and set the `resp.ContentLength` to `-1`.
-
-which looks like:
-
-![Example of download bar with unknown length](examples/download-unknown/download-unknown.gif)
-
-
-### Customization
-
-There is a lot of customization that you can do - change the writer, the color, the width, description, theme, etc. See [all the options](https://pkg.go.dev/github.com/schollz/progressbar/v3?tab=doc#Option).
-
-```golang
-bar := progressbar.NewOptions(1000,
-    progressbar.OptionSetWriter(ansi.NewAnsiStdout()),
-    progressbar.OptionEnableColorCodes(true),
-    progressbar.OptionShowBytes(true),
-    progressbar.OptionSetWidth(15),
-    progressbar.OptionSetDescription("[cyan][1/3][reset] Writing moshable file..."),
-    progressbar.OptionSetTheme(progressbar.Theme{
-        Saucer:        "[green]=[reset]",
-        SaucerHead:    "[green]>[reset]",
-        SaucerPadding: " ",
-        BarStart:      "[",
-        BarEnd:        "]",
-    }))
-for i := 0; i < 1000; i++ {
-    bar.Add(1)
-    time.Sleep(5 * time.Millisecond)
-}
-```
-
-which looks like:
-
-![Example of customized bar](examples/customization/customization.gif)
-
-
-## Contributing
-
-Pull requests are welcome. Feel free to...
-
- Revise documentation
- Add new features
- Fix bugs
- Suggest improvements
-
-## Thanks
-
-Thanks [@Dynom](https://github.com/dynom) for massive improvements in version 2.0!
-
-Thanks [@CrushedPixel](https://github.com/CrushedPixel) for adding descriptions and color code support!
-
-Thanks [@MrMe42](https://github.com/MrMe42) for adding some minor features!
-
-Thanks [@tehstun](https://github.com/tehstun) for some great PRs!
-
-Thanks [@Benzammour](https://github.com/Benzammour) and [@haseth](https://github.com/haseth) for helping create v3!
-
-Thanks [@briandowns](https://github.com/briandowns) for compiling the list of spinners.
-
-## License
-
-MIT
--- a/progressbar/progressbar.go
+++ b/progressbar/progressbar.go
--- a/progressbar/spinners.go
+++ b/progressbar/spinners.go
@@ -1,80 +0,0 @@
-package progressbar
-
-var spinners = map[int][]string{
-	0:  {"←", "↖", "↑", "↗", "→", "↘", "↓", "↙"},
-	1:  {"▁", "▃", "▄", "▅", "▆", "▇", "█", "▇", "▆", "▅", "▄", "▃", "▁"},
-	2:  {"▖", "▘", "▝", "▗"},
-	3:  {"┤", "┘", "┴", "└", "├", "┌", "┬", "┐"},
-	4:  {"◢", "◣", "◤", "◥"},
-	5:  {"◰", "◳", "◲", "◱"},
-	6:  {"◴", "◷", "◶", "◵"},
-	7:  {"◐", "◓", "◑", "◒"},
-	8:  {".", "o", "O", "@", "*"},
-	9:  {"|", "/", "-", "\\"},
-	10: {"◡◡", "⊙⊙", "◠◠"},
-	11: {"⣾", "⣽", "⣻", "⢿", "⡿", "⣟", "⣯", "⣷"},
-	12: {">))'>", " >))'>", "  >))'>", "   >))'>", "    >))'>", "   <'((<", "  <'((<", " <'((<"},
-	13: {"⠁", "⠂", "⠄", "⡀", "⢀", "⠠", "⠐", "⠈"},
-	14: {"⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"},
-	15: {"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"},
-	16: {"▉", "▊", "▋", "▌", "▍", "▎", "▏", "▎", "▍", "▌", "▋", "▊", "▉"},
-	17: {"■", "□", "▪", "▫"},
-	18: {"←", "↑", "→", "↓"},
-	19: {"╫", "╪"},
-	20: {"⇐", "⇖", "⇑", "⇗", "⇒", "⇘", "⇓", "⇙"},
-	21: {"⠁", "⠁", "⠉", "⠙", "⠚", "⠒", "⠂", "⠂", "⠒", "⠲", "⠴", "⠤", "⠄", "⠄", "⠤", "⠠", "⠠", "⠤", "⠦", "⠖", "⠒", "⠐", "⠐", "⠒", "⠓", "⠋", "⠉", "⠈", "⠈"},
-	22: {"⠈", "⠉", "⠋", "⠓", "⠒", "⠐", "⠐", "⠒", "⠖", "⠦", "⠤", "⠠", "⠠", "⠤", "⠦", "⠖", "⠒", "⠐", "⠐", "⠒", "⠓", "⠋", "⠉", "⠈"},
-	23: {"⠁", "⠉", "⠙", "⠚", "⠒", "⠂", "⠂", "⠒", "⠲", "⠴", "⠤", "⠄", "⠄", "⠤", "⠴", "⠲", "⠒", "⠂", "⠂", "⠒", "⠚", "⠙", "⠉", "⠁"},
-	24: {"⠋", "⠙", "⠚", "⠒", "⠂", "⠂", "⠒", "⠲", "⠴", "⠦", "⠖", "⠒", "⠐", "⠐", "⠒", "⠓", "⠋"},
-	25: {"ｦ", "ｧ", "ｨ", "ｩ", "ｪ", "ｫ", "ｬ", "ｭ", "ｮ", "ｯ", "ｱ", "ｲ", "ｳ", "ｴ", "ｵ", "ｶ", "ｷ", "ｸ", "ｹ", "ｺ", "ｻ", "ｼ", "ｽ", "ｾ", "ｿ", "ﾀ", "ﾁ", "ﾂ", "ﾃ", "ﾄ", "ﾅ", "ﾆ", "ﾇ", "ﾈ", "ﾉ", "ﾊ", "ﾋ", "ﾌ", "ﾍ", "ﾎ", "ﾏ", "ﾐ", "ﾑ", "ﾒ", "ﾓ", "ﾔ", "ﾕ", "ﾖ", "ﾗ", "ﾘ", "ﾙ", "ﾚ", "ﾛ", "ﾜ", "ﾝ"},
-	26: {".", "..", "..."},
-	27: {"▁", "▂", "▃", "▄", "▅", "▆", "▇", "█", "▉", "▊", "▋", "▌", "▍", "▎", "▏", "▏", "▎", "▍", "▌", "▋", "▊", "▉", "█", "▇", "▆", "▅", "▄", "▃", "▂", "▁"},
-	28: {".", "o", "O", "°", "O", "o", "."},
-	29: {"+", "x"},
-	30: {"v", "<", "^", ">"},
-	31: {">>--->", " >>--->", "  >>--->", "   >>--->", "    >>--->", "    <---<<", "   <---<<", "  <---<<", " <---<<", "<---<<"},
-	32: {"|", "||", "|||", "||||", "|||||", "|||||||", "||||||||", "|||||||", "||||||", "|||||", "||||", "|||", "||", "|"},
-	33: {"[          ]", "[=         ]", "[==        ]", "[===       ]", "[====      ]", "[=====     ]", "[======    ]", "[=======   ]", "[========  ]", "[========= ]", "[==========]"},
-	34: {"(*---------)", "(-*--------)", "(--*-------)", "(---*------)", "(----*-----)", "(-----*----)", "(------*---)", "(-------*--)", "(--------*-)", "(---------*)"},
-	35: {"█▒▒▒▒▒▒▒▒▒", "███▒▒▒▒▒▒▒", "█████▒▒▒▒▒", "███████▒▒▒", "██████████"},
-	36: {"[                    ]", "[=>                  ]", "[===>                ]", "[=====>              ]", "[======>             ]", "[========>           ]", "[==========>         ]", "[============>       ]", "[==============>     ]", "[================>   ]", "[==================> ]", "[===================>]"},
-	37: {"ဝ", "၀"},
-	38: {"▌", "▀", "▐▄"},
-	39: {"🌍", "🌎", "🌏"},
-	40: {"◜", "◝", "◞", "◟"},
-	41: {"⬒", "⬔", "⬓", "⬕"},
-	42: {"⬖", "⬘", "⬗", "⬙"},
-	43: {"[>>>          >]", "[]>>>>        []", "[]  >>>>      []", "[]    >>>>    []", "[]      >>>>  []", "[]        >>>>[]", "[>>          >>]"},
-	44: {"♠", "♣", "♥", "♦"},
-	45: {"➞", "➟", "➠", "➡", "➠", "➟"},
-	46: {"  |  ", ` \   `, "_    ", ` \   `, "  |  ", "   / ", "    _", "   / "},
-	47: {"  . . . .", ".   . . .", ". .   . .", ". . .   .", ". . . .  ", ". . . . ."},
-	48: {" |     ", "  /    ", "   _   ", `    \  `, "     | ", `    \  `, "   _   ", "  /    "},
-	49: {"⎺", "⎻", "⎼", "⎽", "⎼", "⎻"},
-	50: {"▹▹▹▹▹", "▸▹▹▹▹", "▹▸▹▹▹", "▹▹▸▹▹", "▹▹▹▸▹", "▹▹▹▹▸"},
-	51: {"[    ]", "[   =]", "[  ==]", "[ ===]", "[====]", "[=== ]", "[==  ]", "[=   ]"},
-	52: {"( ●    )", "(  ●   )", "(   ●  )", "(    ● )", "(     ●)", "(    ● )", "(   ●  )", "(  ●   )", "( ●    )"},
-	53: {"✶", "✸", "✹", "✺", "✹", "✷"},
-	54: {"▐|\\____________▌", "▐_|\\___________▌", "▐__|\\__________▌", "▐___|\\_________▌", "▐____|\\________▌", "▐_____|\\_______▌", "▐______|\\______▌", "▐_______|\\_____▌", "▐________|\\____▌", "▐_________|\\___▌", "▐__________|\\__▌", "▐___________|\\_▌", "▐____________|\\▌", "▐____________/|▌", "▐___________/|_▌", "▐__________/|__▌", "▐_________/|___▌", "▐________/|____▌", "▐_______/|_____▌", "▐______/|______▌", "▐_____/|_______▌", "▐____/|________▌", "▐___/|_________▌", "▐__/|__________▌", "▐_/|___________▌", "▐/|____________▌"},
-	55: {"▐⠂       ▌", "▐⠈       ▌", "▐ ⠂      ▌", "▐ ⠠      ▌", "▐  ⡀     ▌", "▐  ⠠     ▌", "▐   ⠂    ▌", "▐   ⠈    ▌", "▐    ⠂   ▌", "▐    ⠠   ▌", "▐     ⡀  ▌", "▐     ⠠  ▌", "▐      ⠂ ▌", "▐      ⠈ ▌", "▐       ⠂▌", "▐       ⠠▌", "▐       ⡀▌", "▐      ⠠ ▌", "▐      ⠂ ▌", "▐     ⠈  ▌", "▐     ⠂  ▌", "▐    ⠠   ▌", "▐    ⡀   ▌", "▐   ⠠    ▌", "▐   ⠂    ▌", "▐  ⠈     ▌", "▐  ⠂     ▌", "▐ ⠠      ▌", "▐ ⡀      ▌", "▐⠠       ▌"},
-	56: {"¿", "?"},
-	57: {"⢹", "⢺", "⢼", "⣸", "⣇", "⡧", "⡗", "⡏"},
-	58: {"⢄", "⢂", "⢁", "⡁", "⡈", "⡐", "⡠"},
-	59: {".  ", ".. ", "...", " ..", "  .", "   "},
-	60: {".", "o", "O", "°", "O", "o", "."},
-	61: {"▓", "▒", "░"},
-	62: {"▌", "▀", "▐", "▄"},
-	63: {"⊶", "⊷"},
-	64: {"▪", "▫"},
-	65: {"□", "■"},
-	66: {"▮", "▯"},
-	67: {"-", "=", "≡"},
-	68: {"d", "q", "p", "b"},
-	69: {"∙∙∙", "●∙∙", "∙●∙", "∙∙●", "∙∙∙"},
-	70: {"🌑 ", "🌒 ", "🌓 ", "🌔 ", "🌕 ", "🌖 ", "🌗 ", "🌘 "},
-	71: {"☗", "☖"},
-	72: {"⧇", "⧆"},
-	73: {"◉", "◎"},
-	74: {"㊂", "㊀", "㊁"},
-	75: {"⦾", "⦿"},
-}
--- a/scripts/build_docker.sh
+++ b/scripts/build_docker.sh
@@ -10,6 +10,8 @@ docker buildx build \
    --platform=linux/arm64,linux/amd64 \
    --build-arg=VERSION \
    --build-arg=GOFLAGS \
+    --cache-from type=local,src=.cache \
+    --cache-to type=local,dest=.cache \
    -f Dockerfile \
    -t ollama \
    .
--- a/scripts/push_docker.sh
+++ b/scripts/push_docker.sh
@@ -10,6 +10,7 @@ docker buildx build \
    --platform=linux/arm64,linux/amd64 \
    --build-arg=VERSION \
    --build-arg=GOFLAGS \
+    --cache-from type=local,src=.cache \
    -f Dockerfile \
    -t ollama/ollama -t ollama/ollama:$VERSION \
    .
--- a/server/download.go
+++ b/server/download.go
@@ -7,6 +7,7 @@ import (
 	"fmt"
 	"io"
 	"log"
+	"math"
 	"net/http"
 	"net/url"
 	"os"
@@ -53,8 +54,8 @@ type blobDownloadPart struct {

 const (
 	numDownloadParts          = 64
-	minDownloadPartSize int64 = 32 * 1000 * 1000
-	maxDownloadPartSize int64 = 256 * 1000 * 1000
+	minDownloadPartSize int64 = 100 * format.MegaByte
+	maxDownloadPartSize int64 = 1000 * format.MegaByte
 )

 func (p *blobDownloadPart) Name() string {
@@ -147,7 +148,6 @@ func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *Regis
 			continue
 		}

-		i := i
 		g.Go(func() error {
 			var err error
 			for try := 0; try < maxRetries; try++ {
@@ -158,12 +158,11 @@ func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *Regis
 					// return immediately if the context is canceled or the device is out of space
 					return err
 				case err != nil:
-					log.Printf("%s part %d attempt %d failed: %v, retrying", b.Digest[7:19], i, try, err)
+					sleep := time.Second * time.Duration(math.Pow(2, float64(try)))
+					log.Printf("%s part %d attempt %d failed: %v, retrying in %s", b.Digest[7:19], part.N, try, err, sleep)
+					time.Sleep(sleep)
 					continue
 				default:
-					if try > 0 {
-						log.Printf("%s part %d completed after %d retries", b.Digest[7:19], i, try)
-					}
 					return nil
 				}
 			}
@@ -285,7 +284,7 @@ func (b *blobDownload) Wait(ctx context.Context, fn func(api.ProgressResponse))
 		}

 		fn(api.ProgressResponse{
-			Status:    fmt.Sprintf("downloading %s", b.Digest),
+			Status:    fmt.Sprintf("pulling %s", b.Digest[7:19]),
 			Digest:    b.Digest,
 			Total:     b.Total,
 			Completed: b.Completed.Load(),
@@ -304,7 +303,7 @@ type downloadOpts struct {
 	fn      func(api.ProgressResponse)
 }

-const maxRetries = 3
+const maxRetries = 6

 var errMaxRetriesExceeded = errors.New("max retries exceeded")

@@ -322,7 +321,7 @@ func downloadBlob(ctx context.Context, opts downloadOpts) error {
 		return err
 	default:
 		opts.fn(api.ProgressResponse{
-			Status:    fmt.Sprintf("downloading %s", opts.digest),
+			Status:    fmt.Sprintf("pulling %s", opts.digest[7:19]),
 			Digest:    opts.digest,
 			Total:     fi.Size(),
 			Completed: fi.Size(),
--- a/server/images.go
+++ b/server/images.go
@@ -228,26 +228,6 @@ func GetModel(name string) (*Model, error) {
 	return model, nil
 }

-func filenameWithPath(path, f string) (string, error) {
-	// if filePath starts with ~/, replace it with the user's home directory.
-	if strings.HasPrefix(f, fmt.Sprintf("~%s", string(os.PathSeparator))) {
-		parts := strings.Split(f, string(os.PathSeparator))
-		home, err := os.UserHomeDir()
-		if err != nil {
-			return "", fmt.Errorf("failed to open file: %v", err)
-		}
-
-		f = filepath.Join(home, filepath.Join(parts[1:]...))
-	}
-
-	// if filePath is not an absolute path, make it relative to the modelfile path
-	if !filepath.IsAbs(f) {
-		f = filepath.Join(filepath.Dir(path), f)
-	}
-
-	return f, nil
-}
-
 func realpath(p string) string {
 	abspath, err := filepath.Abs(p)
 	if err != nil {
@@ -1146,43 +1126,49 @@ func GetSHA256Digest(r io.Reader) (string, int64) {
 var errUnauthorized = fmt.Errorf("unauthorized")

 func makeRequestWithRetry(ctx context.Context, method string, requestURL *url.URL, headers http.Header, body io.ReadSeeker, regOpts *RegistryOptions) (*http.Response, error) {
-	lastErr := errMaxRetriesExceeded
-	for try := 0; try < maxRetries; try++ {
-		resp, err := makeRequest(ctx, method, requestURL, headers, body, regOpts)
-		if err != nil {
-			log.Printf("couldn't start upload: %v", err)
-			return nil, err
+	resp, err := makeRequest(ctx, method, requestURL, headers, body, regOpts)
+	if err != nil {
+		if !errors.Is(err, context.Canceled) {
+			log.Printf("request failed: %v", err)
 		}

-		switch {
-		case resp.StatusCode == http.StatusUnauthorized:
-			auth := resp.Header.Get("www-authenticate")
-			authRedir := ParseAuthRedirectString(auth)
-			token, err := getAuthToken(ctx, authRedir)
+		return nil, err
+	}
+
+	switch {
+	case resp.StatusCode == http.StatusUnauthorized:
+		// Handle authentication error with one retry
+		auth := resp.Header.Get("www-authenticate")
+		authRedir := ParseAuthRedirectString(auth)
+		token, err := getAuthToken(ctx, authRedir)
+		if err != nil {
+			return nil, err
+		}
+		regOpts.Token = token
+		if body != nil {
+			_, err = body.Seek(0, io.SeekStart)
 			if err != nil {
 				return nil, err
 			}
-
-			regOpts.Token = token
-			if body != nil {
-				body.Seek(0, io.SeekStart)
-			}
-			lastErr = errUnauthorized
-		case resp.StatusCode == http.StatusNotFound:
-			return nil, os.ErrNotExist
-		case resp.StatusCode >= http.StatusBadRequest:
-			body, err := io.ReadAll(resp.Body)
-			if err != nil {
-				return nil, fmt.Errorf("%d: %s", resp.StatusCode, err)
-			}
-
-			return nil, fmt.Errorf("%d: %s", resp.StatusCode, body)
-		default:
-			return resp, nil
 		}
+
+		resp, err := makeRequest(ctx, method, requestURL, headers, body, regOpts)
+		if resp.StatusCode == http.StatusUnauthorized {
+			return nil, errUnauthorized
+		}
+
+		return resp, err
+	case resp.StatusCode == http.StatusNotFound:
+		return nil, os.ErrNotExist
+	case resp.StatusCode >= http.StatusBadRequest:
+		responseBody, err := io.ReadAll(resp.Body)
+		if err != nil {
+			return nil, fmt.Errorf("%d: %s", resp.StatusCode, err)
+		}
+		return nil, fmt.Errorf("%d: %s", resp.StatusCode, responseBody)
 	}

-	return nil, lastErr
+	return resp, nil
 }

 func makeRequest(ctx context.Context, method string, requestURL *url.URL, headers http.Header, body io.Reader, regOpts *RegistryOptions) (*http.Response, error) {
--- a/server/routes.go
+++ b/server/routes.go
@@ -666,8 +666,14 @@ func HeadBlobHandler(c *gin.Context) {
 }

 func CreateBlobHandler(c *gin.Context) {
+	targetPath, err := GetBlobsPath(c.Param("digest"))
+	if err != nil {
+		c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
+		return
+	}
+
 	hash := sha256.New()
-	temp, err := os.CreateTemp("", c.Param("digest"))
+	temp, err := os.CreateTemp(filepath.Dir(targetPath), c.Param("digest")+"-")
 	if err != nil {
 		c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
 		return
@@ -690,12 +696,6 @@ func CreateBlobHandler(c *gin.Context) {
 		return
 	}

-	targetPath, err := GetBlobsPath(c.Param("digest"))
-	if err != nil {
-		c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
-		return
-	}
-
 	if err := os.Rename(temp.Name(), targetPath); err != nil {
 		c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
 		return
@@ -794,7 +794,7 @@ func Serve(ln net.Listener, allowOrigins []string) error {
 	if runtime.GOOS == "linux" {
 		// check compatibility to log warnings
 		if _, err := llm.CheckVRAM(); err != nil {
-			log.Printf("Warning: GPU support may not be enabled, check you have installed GPU drivers: %v", err)
+			log.Printf(err.Error())
 		}
 	}

--- a/server/upload.go
+++ b/server/upload.go
@@ -5,9 +5,9 @@ import (
 	"crypto/md5"
 	"errors"
 	"fmt"
-	"hash"
 	"io"
 	"log"
+	"math"
 	"net/http"
 	"net/url"
 	"os"
@@ -35,6 +35,8 @@ type blobUpload struct {

 	context.CancelFunc

+	file *os.File
+
 	done       bool
 	err        error
 	references atomic.Int32
@@ -42,8 +44,8 @@ type blobUpload struct {

 const (
 	numUploadParts          = 64
-	minUploadPartSize int64 = 95 * 1000 * 1000
-	maxUploadPartSize int64 = 1000 * 1000 * 1000
+	minUploadPartSize int64 = 100 * format.MegaByte
+	maxUploadPartSize int64 = 1000 * format.MegaByte
 )

 func (b *blobUpload) Prepare(ctx context.Context, requestURL *url.URL, opts *RegistryOptions) error {
@@ -128,12 +130,12 @@ func (b *blobUpload) Run(ctx context.Context, opts *RegistryOptions) {
 		return
 	}

-	f, err := os.Open(p)
+	b.file, err = os.Open(p)
 	if err != nil {
 		b.err = err
 		return
 	}
-	defer f.Close()
+	defer b.file.Close()

 	g, inner := errgroup.WithContext(ctx)
 	g.SetLimit(numUploadParts)
@@ -145,7 +147,6 @@ func (b *blobUpload) Run(ctx context.Context, opts *RegistryOptions) {
 			g.Go(func() error {
 				var err error
 				for try := 0; try < maxRetries; try++ {
-					part.ReadSeeker = io.NewSectionReader(f, part.Offset, part.Size)
 					err = b.uploadChunk(inner, http.MethodPatch, requestURL, part, opts)
 					switch {
 					case errors.Is(err, context.Canceled):
@@ -153,7 +154,10 @@ func (b *blobUpload) Run(ctx context.Context, opts *RegistryOptions) {
 					case errors.Is(err, errMaxRetriesExceeded):
 						return err
 					case err != nil:
-						log.Printf("%s part %d attempt %d failed: %v, retrying", b.Digest[7:19], part.N, try, err)
+						part.Reset()
+						sleep := time.Second * time.Duration(math.Pow(2, float64(try)))
+						log.Printf("%s part %d attempt %d failed: %v, retrying in %s", b.Digest[7:19], part.N, try, err, sleep)
+						time.Sleep(sleep)
 						continue
 					}

@@ -173,8 +177,16 @@ func (b *blobUpload) Run(ctx context.Context, opts *RegistryOptions) {
 	requestURL := <-b.nextURL

 	var sb strings.Builder
+
+	// calculate md5 checksum and add it to the commit request
 	for _, part := range b.Parts {
-		sb.Write(part.Sum(nil))
+		hash := md5.New()
+		if _, err := io.Copy(hash, io.NewSectionReader(b.file, part.Offset, part.Size)); err != nil {
+			b.err = err
+			return
+		}
+
+		sb.Write(hash.Sum(nil))
 	}

 	md5sum := md5.Sum([]byte(sb.String()))
@@ -188,29 +200,39 @@ func (b *blobUpload) Run(ctx context.Context, opts *RegistryOptions) {
 	headers.Set("Content-Type", "application/octet-stream")
 	headers.Set("Content-Length", "0")

-	resp, err := makeRequestWithRetry(ctx, http.MethodPut, requestURL, headers, nil, opts)
-	if err != nil {
-		b.err = err
+	for try := 0; try < maxRetries; try++ {
+		resp, err := makeRequestWithRetry(ctx, http.MethodPut, requestURL, headers, nil, opts)
+		if err != nil {
+			b.err = err
+			if errors.Is(err, context.Canceled) {
+				return
+			}
+
+			sleep := time.Second * time.Duration(math.Pow(2, float64(try)))
+			log.Printf("%s complete upload attempt %d failed: %v, retrying in %s", b.Digest[7:19], try, err, sleep)
+			time.Sleep(sleep)
+			continue
+		}
+		defer resp.Body.Close()
+
+		b.err = nil
+		b.done = true
 		return
 	}
-	defer resp.Body.Close()
-
-	b.done = true
 }

 func (b *blobUpload) uploadChunk(ctx context.Context, method string, requestURL *url.URL, part *blobUploadPart, opts *RegistryOptions) error {
-	part.Reset()
-
 	headers := make(http.Header)
 	headers.Set("Content-Type", "application/octet-stream")
 	headers.Set("Content-Length", fmt.Sprintf("%d", part.Size))
-	headers.Set("X-Redirect-Uploads", "1")

 	if method == http.MethodPatch {
+		headers.Set("X-Redirect-Uploads", "1")
 		headers.Set("Content-Range", fmt.Sprintf("%d-%d", part.Offset, part.Offset+part.Size-1))
 	}

-	resp, err := makeRequest(ctx, method, requestURL, headers, io.TeeReader(part.ReadSeeker, io.MultiWriter(part, part.Hash)), opts)
+	sr := io.NewSectionReader(b.file, part.Offset, part.Size)
+	resp, err := makeRequest(ctx, method, requestURL, headers, io.TeeReader(sr, part), opts)
 	if err != nil {
 		return err
 	}
@@ -235,6 +257,7 @@ func (b *blobUpload) uploadChunk(ctx context.Context, method string, requestURL
 			return err
 		}

+		// retry uploading to the redirect URL
 		for try := 0; try < maxRetries; try++ {
 			err = b.uploadChunk(ctx, http.MethodPut, redirectURL, part, nil)
 			switch {
@@ -243,7 +266,10 @@ func (b *blobUpload) uploadChunk(ctx context.Context, method string, requestURL
 			case errors.Is(err, errMaxRetriesExceeded):
 				return err
 			case err != nil:
-				log.Printf("%s part %d attempt %d failed: %v, retrying", b.Digest[7:19], part.N, try, err)
+				part.Reset()
+				sleep := time.Second * time.Duration(math.Pow(2, float64(try)))
+				log.Printf("%s part %d attempt %d failed: %v, retrying in %s", b.Digest[7:19], part.N, try, err, sleep)
+				time.Sleep(sleep)
 				continue
 			}

@@ -301,7 +327,7 @@ func (b *blobUpload) Wait(ctx context.Context, fn func(api.ProgressResponse)) er
 		}

 		fn(api.ProgressResponse{
-			Status:    fmt.Sprintf("uploading %s", b.Digest),
+			Status:    fmt.Sprintf("pushing %s", b.Digest[7:19]),
 			Digest:    b.Digest,
 			Total:     b.Total,
 			Completed: b.Completed.Load(),
@@ -315,14 +341,10 @@ func (b *blobUpload) Wait(ctx context.Context, fn func(api.ProgressResponse)) er

 type blobUploadPart struct {
 	// N is the part number
-	N      int
-	Offset int64
-	Size   int64
-	hash.Hash
-
+	N       int
+	Offset  int64
+	Size    int64
 	written int64
-
-	io.ReadSeeker
 	*blobUpload
 }

@@ -334,10 +356,8 @@ func (p *blobUploadPart) Write(b []byte) (n int, err error) {
 }

 func (p *blobUploadPart) Reset() {
-	p.Seek(0, io.SeekStart)
 	p.Completed.Add(-int64(p.written))
 	p.written = 0
-	p.Hash = md5.New()
 }

 func uploadBlob(ctx context.Context, mp ModelPath, layer *Layer, opts *RegistryOptions, fn func(api.ProgressResponse)) error {
@@ -352,7 +372,7 @@ func uploadBlob(ctx context.Context, mp ModelPath, layer *Layer, opts *RegistryO
 	default:
 		defer resp.Body.Close()
 		fn(api.ProgressResponse{
-			Status:    fmt.Sprintf("uploading %s", layer.Digest),
+			Status:    fmt.Sprintf("pushing %s", layer.Digest[7:19]),
 			Digest:    layer.Digest,
 			Total:     layer.Size,
 			Completed: layer.Size,
Author	SHA1	Message	Date
Patrick Devine	7550fd1b7f	use a pulsating spinner	2023-11-20 17:27:53 -08:00
Jeffrey Morgan	a3fcecf943	only set `main_gpu` if value > 0 is provided	2023-11-20 19:54:04 -05:00
Jeffrey Morgan	df07e4a097	remove redundant filename parameter (#1213 )	2023-11-20 17:05:36 -05:00
Michael Yang	0b7ade0d4c	Merge pull request #1212 from jmorganca/mxyng/metal enable metal for fp32, q5_0, q5_1	2023-11-20 13:56:39 -08:00
Michael Yang	19b7a4d715	recent llama.cpp update added kernels for fp32, q5_0, and q5_1	2023-11-20 13:44:31 -08:00
Bruce MacDonald	31ab453d37	resolve FROM path before sending modelfile (#1211 )	2023-11-20 16:43:48 -05:00
Jeffrey Morgan	35c4b5ec16	calculate hash separately from http request	2023-11-20 15:45:11 -05:00
James Braza	f24741ff39	Documenting how to view `Modelfile`s (#723 ) * Documented viewing Modelfiles in ollama.ai/library * Moved Modelfile in ollama.ai down per request	2023-11-20 15:24:29 -05:00
Jeffrey Morgan	8c4022b06b	fix initial progress stats	2023-11-20 14:33:46 -05:00
Jeffrey Morgan	433702f421	hide progress stats on completion	2023-11-20 14:22:39 -05:00
Jeffrey Morgan	6066c70edd	restore progress messages for older endpoints	2023-11-20 11:37:17 -05:00
Jeffrey Morgan	f10ac5de19	restore stats updated every second to progress bar	2023-11-20 10:58:19 -05:00
Jeffrey Morgan	93a108214c	only show decimal points for smaller file size numbers	2023-11-20 10:58:19 -05:00
Purinda Gunasekara	be61a81758	main-gpu argument is not getting passed to llamacpp, fixed. (#1192 )	2023-11-20 10:52:52 -05:00
Toni Soriano	2fdf1b5ff8	add laravel package to README.md (#1208 ) Co-authored-by: Toni <cloudstudio@Tonis-Mac-mini.local>	2023-11-20 10:48:35 -05:00
Huy Le	331068b964	Adding `ogpt.nvim` into the list of plugins! (#1190 ) * adding ollama.nvim for visibility * adding an ogpt.nvim neovim plugin	2023-11-20 10:39:14 -05:00
Andy Brenneke	0179d8eb6b	Add Rivet to Community Integrations (#1183 )	2023-11-20 10:36:47 -05:00
Eli Bendersky	be48741308	README: link to LangChainGo for talking to ollama, with an example (#1206 )	2023-11-20 10:35:07 -05:00
Jeffrey Morgan	6bbd6e26fb	fix temporary newline created and removed with spinner in `ollama run`	2023-11-20 00:49:08 -05:00
Jeffrey Morgan	e6ad4813d3	dont crash when redirecting stderr	2023-11-19 23:50:45 -05:00
Jeffrey Morgan	13ba6df5ab	enable cpu instructions on intel macs	2023-11-19 23:20:26 -05:00
Jeffrey Morgan	9d73d3a6b5	add back `part.Reset()`	2023-11-19 14:32:19 -05:00
Jeffrey Morgan	72cd336410	dont retry on upload complete context cancel	2023-11-19 14:32:19 -05:00
Jeffrey Morgan	1bd594b2fa	revert to using one open file for blob uploads	2023-11-19 14:32:19 -05:00
Jeffrey Morgan	9a8c21ac3d	use exponential everywhere	2023-11-19 14:32:19 -05:00
Jeffrey Morgan	f6b317e8c9	fix sending too little data in chunk upload body	2023-11-19 14:32:19 -05:00
Jeffrey Morgan	ac5076ce1e	exponential backoff up to 30s	2023-11-19 14:32:19 -05:00
Michael Yang	42c2e3a624	upload: retry complete upload	2023-11-19 14:32:19 -05:00
Michael Yang	cb42589792	adjust download/upload parts	2023-11-19 14:32:19 -05:00
Jeffrey Morgan	258addc799	fix comment in `progress.go`	2023-11-19 13:46:19 -05:00
Jeffrey Morgan	c06b9b7304	update progress rendering to be closer to `v0.1.10`	2023-11-19 13:43:21 -05:00
Jeffrey Morgan	95b9acd324	improve pull percentage rendering	2023-11-19 11:00:43 -05:00
Jeffrey Morgan	04cbf5ccc0	progress bar styling improvements	2023-11-19 09:54:33 -05:00
Jeffrey Morgan	e1d7056496	update progress statuses	2023-11-19 09:21:13 -05:00
Jeffrey Morgan	02524a56ff	check retry for authorization error	2023-11-19 00:19:53 -05:00
Jeffrey Morgan	1657c6abc7	add note to specify JSON in the prompt when using JSON mode	2023-11-18 22:59:26 -05:00
Jeffrey Morgan	12e046f12a	remove unused function	2023-11-18 22:16:51 -05:00
Jeffrey Morgan	36a3bbf65f	Update llm/llama.go	2023-11-18 21:25:07 -05:00
Bruce MacDonald	43a726149d	fix potentially inaccurate error message	2023-11-18 21:25:07 -05:00
Jeffrey Morgan	984714f131	update status text when transfering blob on `ollama create`	2023-11-18 09:40:10 -05:00
Jeffrey Morgan	bab9494176	add `-` separator to temp file created on `ollama create`	2023-11-18 09:39:52 -05:00
Jeffrey Morgan	85e4441c6a	cache docker builds	2023-11-18 08:51:38 -05:00
Michael Yang	42e43736a4	Merge pull request #1186 from jmorganca/mxyng/copy-blob fix cross device rename	2023-11-17 21:54:53 -08:00
Michael Yang	c6e6c8ee7e	fix cross device rename	2023-11-17 15:22:17 -08:00
Jeffrey Morgan	a185b29719	fix install script error on linux	2023-11-17 18:00:41 -05:00
Michael Yang	dc84b20d6b	Merge pull request #1104 from jmorganca/mxyng/jupyter add jupyter notebook example	2023-11-17 14:46:26 -08:00
Michael Yang	ad8659b980	Merge pull request #1161 from jmorganca/mxyng/systemd-placeholder placeholder environment variables	2023-11-17 14:45:38 -08:00
Michael Yang	c1bbf5ddee	Merge pull request #1134 from jmorganca/mxyng/progress progress bar	2023-11-17 14:03:35 -08:00
Bruce MacDonald	0b19e24d81	only retry once on auth failure (#1175 )	2023-11-17 14:22:35 -05:00
Michael Yang	3cb07d2773	simplify StopAndClear	2023-11-17 10:26:22 -08:00
Michael Yang	976068369b	stop all spinners on progress stop	2023-11-17 10:06:19 -08:00
Michael Yang	4d677ee389	no divide by zero	2023-11-17 10:06:19 -08:00
Michael Yang	7ea905871a	only move cursor up if pos > 0	2023-11-17 10:06:19 -08:00
Michael Yang	d6ecaa2cbf	update progress responses	2023-11-17 10:06:19 -08:00
Michael Yang	4dcf7a59b1	generate progress	2023-11-17 10:06:19 -08:00
Michael Yang	1c0e092ead	progress cmd	2023-11-17 10:06:19 -08:00
Michael Yang	c4a3ccd7ac	progress	2023-11-17 10:06:19 -08:00
Michael Yang	9f04e5a8ea	format bytes	2023-11-17 10:06:19 -08:00
Michael Yang	f91bb2f7f0	remove progressbar	2023-11-17 10:06:19 -08:00
Michael Yang	0813387414	Merge pull request #1177 from jmorganca/mxyng/faq faq: fix heading and add more details	2023-11-17 10:05:21 -08:00
Michael Yang	4936b5bb37	add jupyter readme	2023-11-17 10:04:52 -08:00
Michael Yang	f7f6d6c693	Update examples/jupyter-notebook/ollama.ipynb Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>	2023-11-17 09:30:07 -08:00
Michael Yang	a3053b66d2	add jupyter notebook example	2023-11-17 09:30:07 -08:00
Michael Yang	c82ead4d01	faq: fix heading and add more details	2023-11-17 09:02:17 -08:00
Michael Yang	90860b6a7e	update faq (#1176 )	2023-11-17 11:42:58 -05:00
Jeffrey Morgan	81092147c4	remove unnecessary `-X POST` from example `curl` commands	2023-11-17 09:50:38 -05:00
Jeffrey Morgan	92656a74b7	Use `llama2` as the model in `api.md`	2023-11-17 07:17:51 -05:00
Michael Yang	32add8577d	placeholder environment variables	2023-11-16 16:57:39 -08:00