Compare commits

...

68 Commits

Author SHA1 Message Date
Patrick Devine
7550fd1b7f use a pulsating spinner 2023-11-20 17:27:53 -08:00
Jeffrey Morgan
a3fcecf943 only set main_gpu if value > 0 is provided 2023-11-20 19:54:04 -05:00
Jeffrey Morgan
df07e4a097 remove redundant filename parameter (#1213) 2023-11-20 17:05:36 -05:00
Michael Yang
0b7ade0d4c Merge pull request #1212 from jmorganca/mxyng/metal
enable metal for fp32, q5_0, q5_1
2023-11-20 13:56:39 -08:00
Michael Yang
19b7a4d715 recent llama.cpp update added kernels for fp32, q5_0, and q5_1 2023-11-20 13:44:31 -08:00
Bruce MacDonald
31ab453d37 resolve FROM path before sending modelfile (#1211) 2023-11-20 16:43:48 -05:00
Jeffrey Morgan
35c4b5ec16 calculate hash separately from http request 2023-11-20 15:45:11 -05:00
James Braza
f24741ff39 Documenting how to view Modelfiles (#723)
* Documented viewing Modelfiles in ollama.ai/library

* Moved Modelfile in ollama.ai down per request
2023-11-20 15:24:29 -05:00
Jeffrey Morgan
8c4022b06b fix initial progress stats 2023-11-20 14:33:46 -05:00
Jeffrey Morgan
433702f421 hide progress stats on completion 2023-11-20 14:22:39 -05:00
Jeffrey Morgan
6066c70edd restore progress messages for older endpoints 2023-11-20 11:37:17 -05:00
Jeffrey Morgan
f10ac5de19 restore stats updated every second to progress bar 2023-11-20 10:58:19 -05:00
Jeffrey Morgan
93a108214c only show decimal points for smaller file size numbers 2023-11-20 10:58:19 -05:00
Purinda Gunasekara
be61a81758 main-gpu argument is not getting passed to llamacpp, fixed. (#1192) 2023-11-20 10:52:52 -05:00
Toni Soriano
2fdf1b5ff8 add laravel package to README.md (#1208)
Co-authored-by: Toni <cloudstudio@Tonis-Mac-mini.local>
2023-11-20 10:48:35 -05:00
Huy Le
331068b964 Adding ogpt.nvim into the list of plugins! (#1190)
* adding ollama.nvim for visibility

* adding an ogpt.nvim neovim plugin
2023-11-20 10:39:14 -05:00
Andy Brenneke
0179d8eb6b Add Rivet to Community Integrations (#1183) 2023-11-20 10:36:47 -05:00
Eli Bendersky
be48741308 README: link to LangChainGo for talking to ollama, with an example (#1206) 2023-11-20 10:35:07 -05:00
Jeffrey Morgan
6bbd6e26fb fix temporary newline created and removed with spinner in ollama run 2023-11-20 00:49:08 -05:00
Jeffrey Morgan
e6ad4813d3 dont crash when redirecting stderr 2023-11-19 23:50:45 -05:00
Jeffrey Morgan
13ba6df5ab enable cpu instructions on intel macs 2023-11-19 23:20:26 -05:00
Jeffrey Morgan
9d73d3a6b5 add back part.Reset() 2023-11-19 14:32:19 -05:00
Jeffrey Morgan
72cd336410 dont retry on upload complete context cancel 2023-11-19 14:32:19 -05:00
Jeffrey Morgan
1bd594b2fa revert to using one open file for blob uploads 2023-11-19 14:32:19 -05:00
Jeffrey Morgan
9a8c21ac3d use exponential everywhere 2023-11-19 14:32:19 -05:00
Jeffrey Morgan
f6b317e8c9 fix sending too little data in chunk upload body 2023-11-19 14:32:19 -05:00
Jeffrey Morgan
ac5076ce1e exponential backoff up to 30s 2023-11-19 14:32:19 -05:00
Michael Yang
42c2e3a624 upload: retry complete upload 2023-11-19 14:32:19 -05:00
Michael Yang
cb42589792 adjust download/upload parts 2023-11-19 14:32:19 -05:00
Jeffrey Morgan
258addc799 fix comment in progress.go 2023-11-19 13:46:19 -05:00
Jeffrey Morgan
c06b9b7304 update progress rendering to be closer to v0.1.10 2023-11-19 13:43:21 -05:00
Jeffrey Morgan
95b9acd324 improve pull percentage rendering 2023-11-19 11:00:43 -05:00
Jeffrey Morgan
04cbf5ccc0 progress bar styling improvements 2023-11-19 09:54:33 -05:00
Jeffrey Morgan
e1d7056496 update progress statuses 2023-11-19 09:21:13 -05:00
Jeffrey Morgan
02524a56ff check retry for authorization error 2023-11-19 00:19:53 -05:00
Jeffrey Morgan
1657c6abc7 add note to specify JSON in the prompt when using JSON mode 2023-11-18 22:59:26 -05:00
Jeffrey Morgan
12e046f12a remove unused function 2023-11-18 22:16:51 -05:00
Jeffrey Morgan
36a3bbf65f Update llm/llama.go 2023-11-18 21:25:07 -05:00
Bruce MacDonald
43a726149d fix potentially inaccurate error message 2023-11-18 21:25:07 -05:00
Jeffrey Morgan
984714f131 update status text when transfering blob on ollama create 2023-11-18 09:40:10 -05:00
Jeffrey Morgan
bab9494176 add - separator to temp file created on ollama create 2023-11-18 09:39:52 -05:00
Jeffrey Morgan
85e4441c6a cache docker builds 2023-11-18 08:51:38 -05:00
Michael Yang
42e43736a4 Merge pull request #1186 from jmorganca/mxyng/copy-blob
fix cross device rename
2023-11-17 21:54:53 -08:00
Michael Yang
c6e6c8ee7e fix cross device rename 2023-11-17 15:22:17 -08:00
Jeffrey Morgan
a185b29719 fix install script error on linux 2023-11-17 18:00:41 -05:00
Michael Yang
dc84b20d6b Merge pull request #1104 from jmorganca/mxyng/jupyter
add jupyter notebook example
2023-11-17 14:46:26 -08:00
Michael Yang
ad8659b980 Merge pull request #1161 from jmorganca/mxyng/systemd-placeholder
placeholder environment variables
2023-11-17 14:45:38 -08:00
Michael Yang
c1bbf5ddee Merge pull request #1134 from jmorganca/mxyng/progress
progress bar
2023-11-17 14:03:35 -08:00
Bruce MacDonald
0b19e24d81 only retry once on auth failure (#1175) 2023-11-17 14:22:35 -05:00
Michael Yang
3cb07d2773 simplify StopAndClear 2023-11-17 10:26:22 -08:00
Michael Yang
976068369b stop all spinners on progress stop 2023-11-17 10:06:19 -08:00
Michael Yang
4d677ee389 no divide by zero 2023-11-17 10:06:19 -08:00
Michael Yang
7ea905871a only move cursor up if pos > 0 2023-11-17 10:06:19 -08:00
Michael Yang
d6ecaa2cbf update progress responses 2023-11-17 10:06:19 -08:00
Michael Yang
4dcf7a59b1 generate progress 2023-11-17 10:06:19 -08:00
Michael Yang
1c0e092ead progress cmd 2023-11-17 10:06:19 -08:00
Michael Yang
c4a3ccd7ac progress 2023-11-17 10:06:19 -08:00
Michael Yang
9f04e5a8ea format bytes 2023-11-17 10:06:19 -08:00
Michael Yang
f91bb2f7f0 remove progressbar 2023-11-17 10:06:19 -08:00
Michael Yang
0813387414 Merge pull request #1177 from jmorganca/mxyng/faq
faq: fix heading and add more details
2023-11-17 10:05:21 -08:00
Michael Yang
4936b5bb37 add jupyter readme 2023-11-17 10:04:52 -08:00
Michael Yang
f7f6d6c693 Update examples/jupyter-notebook/ollama.ipynb
Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>
2023-11-17 09:30:07 -08:00
Michael Yang
a3053b66d2 add jupyter notebook example 2023-11-17 09:30:07 -08:00
Michael Yang
c82ead4d01 faq: fix heading and add more details 2023-11-17 09:02:17 -08:00
Michael Yang
90860b6a7e update faq (#1176) 2023-11-17 11:42:58 -05:00
Jeffrey Morgan
81092147c4 remove unnecessary -X POST from example curl commands 2023-11-17 09:50:38 -05:00
Jeffrey Morgan
92656a74b7 Use llama2 as the model in api.md 2023-11-17 07:17:51 -05:00
Michael Yang
32add8577d placeholder environment variables 2023-11-16 16:57:39 -08:00
28 changed files with 766 additions and 1577 deletions

View File

@@ -6,3 +6,4 @@ scripts
llm/llama.cpp/ggml
llm/llama.cpp/gguf
.env
.cache

1
.gitignore vendored
View File

@@ -6,3 +6,4 @@
dist
ollama
ggml-metal.metal
.cache

View File

@@ -206,7 +206,7 @@ Ollama has a REST API for running and managing models.
For example, to generate text from a model:
```
curl -X POST http://localhost:11434/api/generate -d '{
curl http://localhost:11434/api/generate -d '{
"model": "llama2",
"prompt":"Why is the sky blue?"
}'
@@ -234,11 +234,13 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [Emacs client](https://github.com/zweifisch/ollama)
- [gen.nvim](https://github.com/David-Kunz/gen.nvim)
- [ollama.nvim](https://github.com/nomnivore/ollama.nvim)
- [ogpt.nvim](https://github.com/huynle/ogpt.nvim)
- [gptel Emacs client](https://github.com/karthink/gptel)
### Libraries
- [LangChain](https://python.langchain.com/docs/integrations/llms/ollama) and [LangChain.js](https://js.langchain.com/docs/modules/model_io/models/llms/integrations/ollama) with [example](https://js.langchain.com/docs/use_cases/question_answering/local_retrieval_qa)
- [LangChainGo](https://github.com/tmc/langchaingo/) with [example](https://github.com/tmc/langchaingo/tree/main/examples/ollama-completion-example)
- [LlamaIndex](https://gpt-index.readthedocs.io/en/stable/examples/llm/ollama.html)
- [LiteLLM](https://github.com/BerriAI/litellm)
- [OllamaSharp for .NET](https://github.com/awaescher/OllamaSharp)
@@ -247,6 +249,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [ModelFusion Typescript Library](https://modelfusion.dev/integration/model-provider/ollama)
- [OllamaKit for Swift](https://github.com/kevinhermawan/OllamaKit)
- [Ollama for Dart](https://github.com/breitburg/dart-ollama)
- [Ollama for Laravel](https://github.com/cloudstudio/ollama-laravel)
### Mobile
@@ -262,3 +265,4 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [Dagger Chatbot](https://github.com/samalba/dagger-chatbot)
- [Discord AI Bot](https://github.com/mekb-turtle/discord-ai-bot)
- [Hass Ollama Conversation](https://github.com/ej52/hass-ollama-conversation)
- [Rivet plugin](https://github.com/abrenneke/rivet-plugin-ollama)

View File

@@ -30,7 +30,7 @@ import (
"github.com/jmorganca/ollama/api"
"github.com/jmorganca/ollama/format"
"github.com/jmorganca/ollama/parser"
"github.com/jmorganca/ollama/progressbar"
"github.com/jmorganca/ollama/progress"
"github.com/jmorganca/ollama/readline"
"github.com/jmorganca/ollama/server"
"github.com/jmorganca/ollama/version"
@@ -48,14 +48,16 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
return err
}
p := progress.NewProgress(os.Stderr)
defer p.Stop()
bars := make(map[string]*progress.Bar)
modelfile, err := os.ReadFile(filename)
if err != nil {
return err
}
spinner := NewSpinner("transferring context")
go spinner.Spin(100 * time.Millisecond)
commands, err := parser.Parse(bytes.NewReader(modelfile))
if err != nil {
return err
@@ -66,6 +68,10 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
return err
}
status := "transferring model data"
spinner := progress.NewSpinner(status)
p.Add(status, spinner)
for _, c := range commands {
switch c.Name {
case "model", "adapter":
@@ -76,6 +82,10 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
path = filepath.Join(home, path[2:])
}
if !filepath.IsAbs(path) {
path = filepath.Join(filepath.Dir(filename), path)
}
bin, err := os.Open(path)
if errors.Is(err, os.ErrNotExist) && c.Name == "model" {
continue
@@ -99,41 +109,34 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
}
}
var currentDigest string
var bar *progressbar.ProgressBar
request := api.CreateRequest{Name: args[0], Path: filename, Modelfile: string(modelfile)}
fn := func(resp api.ProgressResponse) error {
if resp.Digest != currentDigest && resp.Digest != "" {
if resp.Digest != "" {
spinner.Stop()
currentDigest = resp.Digest
// pulling
bar = progressbar.DefaultBytes(
resp.Total,
resp.Status,
)
bar.Set64(resp.Completed)
} else if resp.Digest == currentDigest && resp.Digest != "" {
bar.Set64(resp.Completed)
} else {
currentDigest = ""
bar, ok := bars[resp.Digest]
if !ok {
bar = progress.NewBar(fmt.Sprintf("pulling %s...", resp.Digest[7:19]), resp.Total, resp.Completed)
bars[resp.Digest] = bar
p.Add(resp.Digest, bar)
}
bar.Set(resp.Completed)
} else if status != resp.Status {
spinner.Stop()
spinner = NewSpinner(resp.Status)
go spinner.Spin(100 * time.Millisecond)
status = resp.Status
spinner = progress.NewSpinner(status)
p.Add(status, spinner)
}
return nil
}
request := api.CreateRequest{Name: args[0], Modelfile: string(modelfile)}
if err := client.Create(context.Background(), &request, fn); err != nil {
return err
}
spinner.Stop()
if spinner.description != "success" {
return errors.New("unexpected end to create model")
}
return nil
}
@@ -170,36 +173,46 @@ func PushHandler(cmd *cobra.Command, args []string) error {
return err
}
var currentDigest string
var bar *progressbar.ProgressBar
p := progress.NewProgress(os.Stderr)
defer p.Stop()
bars := make(map[string]*progress.Bar)
var status string
var spinner *progress.Spinner
request := api.PushRequest{Name: args[0], Insecure: insecure}
fn := func(resp api.ProgressResponse) error {
if resp.Digest != currentDigest && resp.Digest != "" {
currentDigest = resp.Digest
bar = progressbar.DefaultBytes(
resp.Total,
fmt.Sprintf("pushing %s...", resp.Digest[7:19]),
)
if resp.Digest != "" {
if spinner != nil {
spinner.Stop()
}
bar.Set64(resp.Completed)
} else if resp.Digest == currentDigest && resp.Digest != "" {
bar.Set64(resp.Completed)
} else {
currentDigest = ""
fmt.Println(resp.Status)
bar, ok := bars[resp.Digest]
if !ok {
bar = progress.NewBar(fmt.Sprintf("pushing %s...", resp.Digest[7:19]), resp.Total, resp.Completed)
bars[resp.Digest] = bar
p.Add(resp.Digest, bar)
}
bar.Set(resp.Completed)
} else if status != resp.Status {
if spinner != nil {
spinner.Stop()
}
status = resp.Status
spinner = progress.NewSpinner(status)
p.Add(status, spinner)
}
return nil
}
request := api.PushRequest{Name: args[0], Insecure: insecure}
if err := client.Push(context.Background(), &request, fn); err != nil {
return err
}
if bar != nil && !bar.IsFinished() {
return errors.New("unexpected end to push model")
}
spinner.Stop()
return nil
}
@@ -350,46 +363,51 @@ func PullHandler(cmd *cobra.Command, args []string) error {
return err
}
return pull(args[0], insecure)
}
func pull(model string, insecure bool) error {
client, err := api.ClientFromEnvironment()
if err != nil {
return err
}
var currentDigest string
var bar *progressbar.ProgressBar
p := progress.NewProgress(os.Stderr)
defer p.Stop()
bars := make(map[string]*progress.Bar)
var status string
var spinner *progress.Spinner
request := api.PullRequest{Name: model, Insecure: insecure}
fn := func(resp api.ProgressResponse) error {
if resp.Digest != currentDigest && resp.Digest != "" {
currentDigest = resp.Digest
bar = progressbar.DefaultBytes(
resp.Total,
fmt.Sprintf("pulling %s...", resp.Digest[7:19]),
)
if resp.Digest != "" {
if spinner != nil {
spinner.Stop()
}
bar.Set64(resp.Completed)
} else if resp.Digest == currentDigest && resp.Digest != "" {
bar.Set64(resp.Completed)
} else {
currentDigest = ""
fmt.Println(resp.Status)
bar, ok := bars[resp.Digest]
if !ok {
bar = progress.NewBar(fmt.Sprintf("pulling %s...", resp.Digest[7:19]), resp.Total, resp.Completed)
bars[resp.Digest] = bar
p.Add(resp.Digest, bar)
}
bar.Set(resp.Completed)
} else if status != resp.Status {
if spinner != nil {
spinner.Stop()
}
status = resp.Status
spinner = progress.NewSpinner(status)
p.Add(status, spinner)
}
return nil
}
request := api.PullRequest{Name: args[0], Insecure: insecure}
if err := client.Pull(context.Background(), &request, fn); err != nil {
return err
}
if bar != nil && !bar.IsFinished() {
return errors.New("unexpected end to pull model")
}
return nil
}
@@ -442,8 +460,11 @@ func generate(cmd *cobra.Command, model, prompt string, wordWrap bool, format st
return err
}
spinner := NewSpinner("")
go spinner.Spin(60 * time.Millisecond)
p := progress.NewProgress(os.Stderr)
defer p.StopAndClear()
spinner := progress.NewSpinner("")
p.Add("", spinner)
var latest api.GenerateResponse
@@ -475,9 +496,7 @@ func generate(cmd *cobra.Command, model, prompt string, wordWrap bool, format st
request := api.GenerateRequest{Model: model, Prompt: prompt, Context: generateContext, Format: format}
fn := func(response api.GenerateResponse) error {
if !spinner.IsFinished() {
spinner.Finish()
}
p.StopAndClear()
latest = response
@@ -511,7 +530,6 @@ func generate(cmd *cobra.Command, model, prompt string, wordWrap bool, format st
if err := client.Generate(cancelCtx, &request, fn); err != nil {
if strings.Contains(err.Error(), "context canceled") && abort {
spinner.Finish()
return nil
}
return err

View File

@@ -1,44 +0,0 @@
package cmd
import (
"fmt"
"os"
"time"
"github.com/jmorganca/ollama/progressbar"
)
type Spinner struct {
description string
*progressbar.ProgressBar
}
func NewSpinner(description string) *Spinner {
return &Spinner{
description: description,
ProgressBar: progressbar.NewOptions(-1,
progressbar.OptionSetWriter(os.Stderr),
progressbar.OptionThrottle(60*time.Millisecond),
progressbar.OptionSpinnerType(14),
progressbar.OptionSetRenderBlankState(true),
progressbar.OptionSetElapsedTime(false),
progressbar.OptionClearOnFinish(),
progressbar.OptionSetDescription(description),
),
}
}
func (s *Spinner) Spin(tick time.Duration) {
for range time.Tick(tick) {
if s.IsFinished() {
break
}
s.Add(1)
}
}
func (s *Spinner) Stop() {
s.Finish()
fmt.Println(s.description)
}

View File

@@ -51,14 +51,16 @@ Advanced parameters (optional):
### JSON mode
Enable JSON mode by setting the `format` parameter to `json` and specifying the model should use JSON in the `prompt`. This will structure the response as valid JSON. See the JSON mode [example](#request-json-mode) below.
Enable JSON mode by setting the `format` parameter to `json`. This will structure the response as valid JSON. See the JSON mode [example](#request-json-mode) below.
> Note: it's important to instruct the model to use JSON in the `prompt`. Otherwise, the model may generate large amounts whitespace.
### Examples
#### Request
```shell
curl -X POST http://localhost:11434/api/generate -d '{
curl http://localhost:11434/api/generate -d '{
"model": "llama2",
"prompt": "Why is the sky blue?"
}'
@@ -113,8 +115,8 @@ To calculate how fast the response is generated in tokens per second (token/s),
#### Request (No streaming)
```shell
curl -X POST http://localhost:11434/api/generate -d '{
"model": "llama2:7b",
curl http://localhost:11434/api/generate -d '{
"model": "llama2",
"prompt": "Why is the sky blue?",
"stream": false
}'
@@ -126,7 +128,7 @@ If `stream` is set to `false`, the response will be a single JSON object:
```json
{
"model": "llama2:7b",
"model": "llama2",
"created_at": "2023-08-04T19:22:45.499127Z",
"response": "The sky is blue because it is the color of the sky.",
"context": [1, 2, 3],
@@ -147,7 +149,7 @@ If `stream` is set to `false`, the response will be a single JSON object:
In some cases you may wish to bypass the templating system and provide a full prompt. In this case, you can use the `raw` parameter to disable formatting and context.
```shell
curl -X POST http://localhost:11434/api/generate -d '{
curl http://localhost:11434/api/generate -d '{
"model": "mistral",
"prompt": "[INST] why is the sky blue? [/INST]",
"raw": true,
@@ -175,7 +177,7 @@ curl -X POST http://localhost:11434/api/generate -d '{
#### Request (JSON mode)
```shell
curl -X POST http://localhost:11434/api/generate -d '{
curl http://localhost:11434/api/generate -d '{
"model": "llama2",
"prompt": "What color is the sky at different times of the day? Respond using JSON",
"format": "json",
@@ -224,8 +226,8 @@ The value of `response` will be a string containing JSON similar to:
If you want to set custom options for the model at runtime rather than in the Modelfile, you can do so with the `options` parameter. This example sets every available option, but you can set any of them individually and omit the ones you do not want to override.
```shell
curl -X POST http://localhost:11434/api/generate -d '{
"model": "llama2:7b",
curl http://localhost:11434/api/generate -d '{
"model": "llama2",
"prompt": "Why is the sky blue?",
"stream": false,
"options": {
@@ -270,7 +272,7 @@ curl -X POST http://localhost:11434/api/generate -d '{
```json
{
"model": "llama2:7b",
"model": "llama2",
"created_at": "2023-08-04T19:22:45.499127Z",
"response": "The sky is blue because it is the color of the sky.",
"context": [1, 2, 3],
@@ -297,19 +299,18 @@ Create a model from a [`Modelfile`](./modelfile.md). It is recommended to set `m
### Parameters
- `name`: name of the model to create
- `path`: path to the Modelfile (deprecated: please use modelfile instead)
- `modelfile`: contents of the Modelfile
- `stream`: (optional) if `false` the response will be returned as a single response object, rather than a stream of objects
- `path` (deprecated): path to the Modelfile
### Examples
#### Request
```shell
curl -X POST http://localhost:11434/api/create -d '{
curl http://localhost:11434/api/create -d '{
"name": "mario",
"path": "~/Modelfile",
"modelfile": "FROM llama2"
"modelfile": "FROM llama2\nSYSTEM You are mario from Super Mario Bros."
}'
```
@@ -395,7 +396,7 @@ A single JSON object will be returned.
{
"models": [
{
"name": "llama2:7b",
"name": "llama2",
"modified_at": "2023-08-02T17:02:23.713454393-07:00",
"size": 3791730596
},
@@ -426,7 +427,7 @@ Show details about a model including modelfile, template, parameters, license, a
```shell
curl http://localhost:11434/api/show -d '{
"name": "llama2:7b"
"name": "llama2"
}'
```
@@ -455,7 +456,7 @@ Copy a model. Creates a model with another name from an existing model.
```shell
curl http://localhost:11434/api/copy -d '{
"source": "llama2:7b",
"source": "llama2",
"destination": "llama2-backup"
}'
```
@@ -509,8 +510,8 @@ Download a model from the ollama library. Cancelled pulls are resumed from where
#### Request
```shell
curl -X POST http://localhost:11434/api/pull -d '{
"name": "llama2:7b"
curl http://localhost:11434/api/pull -d '{
"name": "llama2"
}'
```
@@ -581,7 +582,7 @@ Upload a model to a model library. Requires registering for ollama.ai and adding
#### Request
```shell
curl -X POST http://localhost:11434/api/push -d '{
curl http://localhost:11434/api/push -d '{
"name": "mattw/pygmalion:latest"
}'
```
@@ -649,8 +650,8 @@ Advanced parameters:
#### Request
```shell
curl -X POST http://localhost:11434/api/embeddings -d '{
"model": "llama2:7b",
curl http://localhost:11434/api/embeddings -d '{
"model": "llama2",
"prompt": "Here is an article about llamas..."
}'
```

View File

@@ -148,3 +148,9 @@ Build and run this image:
docker build -t ollama-with-ca .
docker run -d -e HTTPS_PROXY=https://my.proxy.example.com -p 11434:11434 ollama-with-ca
```
## How do I use Ollama with GPU acceleration in Docker?
The Ollama Docker container can be configured with GPU acceleration in Linux or Windows (with WSL2). This requires the [nvidia-container-toolkit](https://github.com/NVIDIA/nvidia-container-toolkit). See [ollama/ollama](https://hub.docker.com/r/ollama/ollama) for more details.
GPU acceleration is not available for Docker Desktop in macOS due to the lack of GPU passthrough and emulation.

View File

@@ -41,6 +41,8 @@ INSTRUCTION arguments
## Examples
### Basic `Modelfile`
An example of a `Modelfile` creating a mario blueprint:
```modelfile
@@ -63,6 +65,35 @@ To use this:
More examples are available in the [examples directory](../examples).
### `Modelfile`s in [ollama.ai/library][1]
There are two ways to view `Modelfile`s underlying the models in [ollama.ai/library][1]:
- Option 1: view a details page from a model's tags page:
1. Go to a particular model's tags (e.g. https://ollama.ai/library/llama2/tags)
2. Click on a tag (e.g. https://ollama.ai/library/llama2:13b)
3. Scroll down to "Layers"
- Note: if the [`FROM` instruction](#from-required) is not present,
it means the model was created from a local file
- Option 2: use `ollama show` to print the `Modelfile` like so:
```bash
> ollama show --modelfile llama2:13b
# Modelfile generated by "ollama show"
# To build a new Modelfile based on this one, replace the FROM line with:
# FROM llama2:13b
FROM /root/.ollama/models/blobs/sha256:123abc
TEMPLATE """[INST] {{ if and .First .System }}<<SYS>>{{ .System }}<</SYS>>
{{ end }}{{ .Prompt }} [/INST] """
SYSTEM """"""
PARAMETER stop [INST]
PARAMETER stop [/INST]
PARAMETER stop <<SYS>>
PARAMETER stop <</SYS>>
```
## Instructions
### FROM (Required)
@@ -177,3 +208,5 @@ LICENSE """
- the **`Modelfile` is not case sensitive**. In the examples, we use uppercase for instructions to make it easier to distinguish it from arguments.
- Instructions can be in any order. In the examples, we start with FROM instruction to keep it easily readable.
[1]: https://ollama.ai/library

View File

@@ -0,0 +1,5 @@
# Ollama Jupyter Notebook
This example downloads and installs Ollama in a Jupyter instance such as Google Colab. It will start the Ollama service and expose an endpoint using `ngrok` which can be used to communicate with the Ollama instance remotely.
For best results, use an instance with GPU accelerator.

View File

@@ -0,0 +1,102 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "93f59dcb-c588-41b8-a792-55d88ade739c",
"metadata": {},
"outputs": [],
"source": [
"# Download and run the Ollama Linux install script\n",
"!curl https://ollama.ai/install.sh | sh\n",
"!command -v systemctl >/dev/null && sudo systemctl stop ollama"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "658c147e-c7f8-490e-910e-62b80f577dda",
"metadata": {},
"outputs": [],
"source": [
"!pip install aiohttp pyngrok\n",
"\n",
"import os\n",
"import asyncio\n",
"from aiohttp import ClientSession\n",
"\n",
"# Set LD_LIBRARY_PATH so the system NVIDIA library becomes preferred\n",
"# over the built-in library. This is particularly important for \n",
"# Google Colab which installs older drivers\n",
"os.environ.update({'LD_LIBRARY_PATH': '/usr/lib64-nvidia'})\n",
"\n",
"async def run(cmd):\n",
" '''\n",
" run is a helper function to run subcommands asynchronously.\n",
" '''\n",
" print('>>> starting', *cmd)\n",
" p = await asyncio.subprocess.create_subprocess_exec(\n",
" *cmd,\n",
" stdout=asyncio.subprocess.PIPE,\n",
" stderr=asyncio.subprocess.PIPE,\n",
" )\n",
"\n",
" async def pipe(lines):\n",
" async for line in lines:\n",
" print(line.strip().decode('utf-8'))\n",
"\n",
" await asyncio.gather(\n",
" pipe(p.stdout),\n",
" pipe(p.stderr),\n",
" )\n",
"\n",
"\n",
"await asyncio.gather(\n",
" run(['ollama', 'serve']),\n",
" run(['ngrok', 'http', '--log', 'stderr', '11434']),\n",
")"
]
},
{
"cell_type": "markdown",
"id": "e7735a55-9aad-4caf-8683-52e2163ba53b",
"metadata": {},
"source": [
"The previous cell starts two processes, `ollama` and `ngrok`. The log output will show a line like the following which describes the external address.\n",
"\n",
"```\n",
"t=2023-11-12T22:55:56+0000 lvl=info msg=\"started tunnel\" obj=tunnels name=command_line addr=http://localhost:11434 url=https://8249-34-125-179-11.ngrok.io\n",
"```\n",
"\n",
"The external address in this case is `https://8249-34-125-179-11.ngrok.io` which can be passed into `OLLAMA_HOST` to access this instance.\n",
"\n",
"```bash\n",
"export OLLAMA_HOST=https://8249-34-125-179-11.ngrok.io\n",
"ollama list\n",
"ollama run mistral\n",
"```"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -1,23 +1,45 @@
package format
import "fmt"
import (
"fmt"
"math"
)
const (
Byte = 1
KiloByte = Byte * 1000
MegaByte = KiloByte * 1000
GigaByte = MegaByte * 1000
TeraByte = GigaByte * 1000
)
func HumanBytes(b int64) string {
var value float64
var unit string
switch {
case b > GigaByte:
return fmt.Sprintf("%.1f GB", float64(b)/GigaByte)
case b > MegaByte:
return fmt.Sprintf("%.1f MB", float64(b)/MegaByte)
case b > KiloByte:
return fmt.Sprintf("%.1f KB", float64(b)/KiloByte)
case b >= TeraByte:
value = float64(b) / TeraByte
unit = "TB"
case b >= GigaByte:
value = float64(b) / GigaByte
unit = "GB"
case b >= MegaByte:
value = float64(b) / MegaByte
unit = "MB"
case b >= KiloByte:
value = float64(b) / KiloByte
unit = "KB"
default:
return fmt.Sprintf("%d B", b)
}
switch {
case value >= 100:
return fmt.Sprintf("%d %s", int(value), unit)
case value != math.Trunc(value):
return fmt.Sprintf("%.1f %s", value, unit)
default:
return fmt.Sprintf("%d %s", int(value), unit)
}
}

View File

@@ -7,13 +7,13 @@ package llm
//go:generate git -C ggml apply ../patches/0002-34B-model-support.patch
//go:generate git -C ggml apply ../patches/0003-metal-fix-synchronization-in-new-matrix-multiplicati.patch
//go:generate git -C ggml apply ../patches/0004-metal-add-missing-barriers-for-mul-mat-2699.patch
//go:generate cmake -S ggml -B ggml/build/cpu -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0
//go:generate cmake -S ggml -B ggml/build/cpu -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_NAME=Darwin -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0
//go:generate cmake --build ggml/build/cpu --target server --config Release
//go:generate mv ggml/build/cpu/bin/server ggml/build/cpu/bin/ollama-runner
//go:generate git submodule update --force gguf
//go:generate git -C gguf apply ../patches/0001-update-default-log-target.patch
//go:generate git -C gguf apply ../patches/0001-metal-handle-ggml_scale-for-n-4-0-close-3754.patch
//go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_METAL=off -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off
//go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_METAL=off -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_NAME=Darwin -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_AVX512=off -DLLAMA_FMA=on -DLLAMA_F16C=on
//go:generate cmake --build gguf/build/cpu --target server --config Release
//go:generate mv gguf/build/cpu/bin/server gguf/build/cpu/bin/ollama-runner

View File

@@ -226,7 +226,7 @@ type llama struct {
}
var (
errNvidiaSMI = errors.New("nvidia-smi command failed")
errNvidiaSMI = errors.New("warning: gpu support may not be enabled, check that you have installed GPU drivers: nvidia-smi command failed")
errAvailableVRAM = errors.New("not enough VRAM available, falling back to CPU only")
)
@@ -343,6 +343,10 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers
"--embedding",
}
if opts.MainGPU > 0 {
params = append(params, "--main-gpu", fmt.Sprintf("%d", opts.MainGPU))
}
if opts.RopeFrequencyBase > 0 {
params = append(params, "--rope-freq-base", fmt.Sprintf("%f", opts.RopeFrequencyBase))
}
@@ -544,6 +548,7 @@ func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string,
"stream": true,
"n_predict": llm.NumPredict,
"n_keep": llm.NumKeep,
"main_gpu": llm.MainGPU,
"temperature": llm.Temperature,
"top_k": llm.TopK,
"top_p": llm.TopP,

View File

@@ -41,20 +41,13 @@ func New(workDir, model string, adapters []string, opts api.Options) (LLM, error
if runtime.GOOS == "darwin" {
switch ggml.FileType() {
case "Q8_0":
case "F32", "Q5_0", "Q5_1", "Q8_0":
if ggml.Name() != "gguf" && opts.NumGPU != 0 {
// GGML Q8_0 do not support Metal API and will
// cause the runner to segmentation fault so disable GPU
log.Printf("WARNING: GPU disabled for F32, Q5_0, Q5_1, and Q8_0")
opts.NumGPU = 0
}
case "F32", "Q5_0", "Q5_1":
if opts.NumGPU != 0 {
// F32, Q5_0, Q5_1, and Q8_0 do not support Metal API and will
// cause the runner to segmentation fault so disable GPU
log.Printf("WARNING: GPU disabled for F32, Q5_0, Q5_1, and Q8_0")
opts.NumGPU = 0
}
}
var requiredMemory int64

14
main.go
View File

@@ -2,11 +2,25 @@ package main
import (
"context"
"fmt"
"os"
"os/signal"
"syscall"
"github.com/jmorganca/ollama/cmd"
"github.com/spf13/cobra"
)
func main() {
sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, syscall.SIGINT)
go func() {
<-sigChan
fmt.Print("\033[?25h")
os.Exit(0)
}()
cobra.CheckErr(cmd.NewCLI().ExecuteContext(context.Background()))
}

155
progress/bar.go Normal file
View File

@@ -0,0 +1,155 @@
package progress
import (
"fmt"
"math"
"os"
"strings"
"time"
"github.com/jmorganca/ollama/format"
"golang.org/x/term"
)
type Stats struct {
rate int64
value int64
remaining time.Duration
}
type Bar struct {
message string
messageWidth int
maxValue int64
initialValue int64
currentValue int64
started time.Time
stats Stats
statted time.Time
}
func NewBar(message string, maxValue, initialValue int64) *Bar {
return &Bar{
message: message,
messageWidth: -1,
maxValue: maxValue,
initialValue: initialValue,
currentValue: initialValue,
started: time.Now(),
}
}
func (b *Bar) String() string {
termWidth, _, err := term.GetSize(int(os.Stderr.Fd()))
if err != nil {
termWidth = 80
}
var pre, mid, suf strings.Builder
if b.message != "" {
message := strings.TrimSpace(b.message)
if b.messageWidth > 0 && len(message) > b.messageWidth {
message = message[:b.messageWidth]
}
fmt.Fprintf(&pre, "%s", message)
if b.messageWidth-pre.Len() >= 0 {
pre.WriteString(strings.Repeat(" ", b.messageWidth-pre.Len()))
}
pre.WriteString(" ")
}
fmt.Fprintf(&pre, "%3.0f%% ", math.Floor(b.percent()))
fmt.Fprintf(&suf, "(%s/%s", format.HumanBytes(b.currentValue), format.HumanBytes(b.maxValue))
stats := b.Stats()
rate := int64(stats.rate)
if rate > 0 {
fmt.Fprintf(&suf, ", %s/s", format.HumanBytes(rate))
}
fmt.Fprintf(&suf, ")")
elapsed := time.Since(b.started)
if b.percent() < 100 && rate > 0 {
fmt.Fprintf(&suf, " [%s:%s]", elapsed.Round(time.Second), stats.remaining)
} else {
fmt.Fprintf(&suf, " ")
}
mid.WriteString("▕")
// add 3 extra spaces: 2 boundary characters and 1 space at the end
f := termWidth - pre.Len() - suf.Len() - 3
n := int(float64(f) * b.percent() / 100)
if n > 0 {
mid.WriteString(strings.Repeat("█", n))
}
if f-n > 0 {
mid.WriteString(strings.Repeat(" ", f-n))
}
mid.WriteString("▏")
return pre.String() + mid.String() + suf.String()
}
func (b *Bar) Set(value int64) {
if value >= b.maxValue {
value = b.maxValue
}
b.currentValue = value
}
func (b *Bar) percent() float64 {
if b.maxValue > 0 {
return float64(b.currentValue) / float64(b.maxValue) * 100
}
return 0
}
func (b *Bar) Stats() Stats {
if time.Since(b.statted) < time.Second {
return b.stats
}
switch {
case b.statted.IsZero():
b.stats = Stats{
value: b.initialValue,
rate: 0,
remaining: 0,
}
case b.currentValue >= b.maxValue:
b.stats = Stats{
value: b.maxValue,
rate: 0,
remaining: 0,
}
default:
rate := b.currentValue - b.stats.value
var remaining time.Duration
if rate > 0 {
remaining = time.Second * time.Duration((float64(b.maxValue-b.currentValue))/(float64(rate)))
}
b.stats = Stats{
value: b.currentValue,
rate: rate,
remaining: remaining,
}
}
b.statted = time.Now()
return b.stats
}

113
progress/progress.go Normal file
View File

@@ -0,0 +1,113 @@
package progress
import (
"fmt"
"io"
"sync"
"time"
)
type State interface {
String() string
}
type Progress struct {
mu sync.Mutex
w io.Writer
pos int
ticker *time.Ticker
states []State
}
func NewProgress(w io.Writer) *Progress {
p := &Progress{w: w}
go p.start()
return p
}
func (p *Progress) stop() bool {
for _, state := range p.states {
if spinner, ok := state.(*Spinner); ok {
spinner.Stop()
}
}
if p.ticker != nil {
p.ticker.Stop()
p.ticker = nil
p.render()
return true
}
return false
}
func (p *Progress) Stop() bool {
stopped := p.stop()
if stopped {
fmt.Fprint(p.w, "\n")
}
return stopped
}
func (p *Progress) StopAndClear() bool {
fmt.Fprint(p.w, "\033[?25l")
defer fmt.Fprint(p.w, "\033[?25h")
stopped := p.stop()
if stopped {
// clear all progress lines
for i := 0; i < p.pos; i++ {
if i > 0 {
fmt.Fprint(p.w, "\033[A")
}
fmt.Fprint(p.w, "\033[2K\033[1G")
}
}
return stopped
}
func (p *Progress) Add(key string, state State) {
p.mu.Lock()
defer p.mu.Unlock()
p.states = append(p.states, state)
}
func (p *Progress) render() error {
p.mu.Lock()
defer p.mu.Unlock()
// clear already rendered progress lines
for i := 0; i < p.pos; i++ {
if i > 0 {
fmt.Fprint(p.w, "\033[A")
}
fmt.Fprint(p.w, "\033[2K\033[1G")
}
// render progress lines
for i, state := range p.states {
fmt.Fprint(p.w, state.String())
if i < len(p.states)-1 {
fmt.Fprint(p.w, "\n")
}
}
p.pos = len(p.states)
return nil
}
func (p *Progress) start() {
p.ticker = time.NewTicker(100 * time.Millisecond)
fmt.Fprint(p.w, "\033[?25l")
defer fmt.Fprintln(p.w, "\033[?25h")
for range p.ticker.C {
p.render()
}
}

72
progress/spinner.go Normal file
View File

@@ -0,0 +1,72 @@
package progress
import (
"fmt"
"strings"
"time"
)
type Spinner struct {
message string
messageWidth int
value int
ticker *time.Ticker
started time.Time
stopped time.Time
}
func NewSpinner(message string) *Spinner {
s := &Spinner{
message: message,
started: time.Now(),
value: 231,
}
go s.start()
return s
}
func (s *Spinner) String() string {
var sb strings.Builder
if len(s.message) > 0 {
message := strings.TrimSpace(s.message)
if s.messageWidth > 0 && len(message) > s.messageWidth {
message = message[:s.messageWidth]
}
fmt.Fprintf(&sb, "%s", message)
if s.messageWidth-sb.Len() >= 0 {
sb.WriteString(strings.Repeat(" ", s.messageWidth-sb.Len()))
}
sb.WriteString(" ")
}
if s.stopped.IsZero() {
sb.WriteString(fmt.Sprintf("\033[48;5;%dm ", s.value))
sb.WriteString("\033[0m")
}
return sb.String()
}
func (s *Spinner) start() {
s.ticker = time.NewTicker(40 * time.Millisecond)
for range s.ticker.C {
if s.value < 255 {
s.value++
} else {
s.value = 231
}
if !s.stopped.IsZero() {
return
}
}
}
func (s *Spinner) Stop() {
if s.stopped.IsZero() {
s.stopped = time.Now()
}
}

View File

@@ -1,21 +0,0 @@
MIT License
Copyright (c) 2017 Zack
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@@ -1,121 +0,0 @@
# progressbar
[![CI](https://github.com/schollz/progressbar/actions/workflows/ci.yml/badge.svg?branch=main&event=push)](https://github.com/schollz/progressbar/actions/workflows/ci.yml)
[![go report card](https://goreportcard.com/badge/github.com/schollz/progressbar)](https://goreportcard.com/report/github.com/schollz/progressbar)
[![coverage](https://img.shields.io/badge/coverage-84%25-brightgreen.svg)](https://gocover.io/github.com/schollz/progressbar)
[![godocs](https://godoc.org/github.com/schollz/progressbar?status.svg)](https://godoc.org/github.com/schollz/progressbar/v3)
A very simple thread-safe progress bar which should work on every OS without problems. I needed a progressbar for [croc](https://github.com/schollz/croc) and everything I tried had problems, so I made another one. In order to be OS agnostic I do not plan to support [multi-line outputs](https://github.com/schollz/progressbar/issues/6).
## Install
```
go get -u github.com/schollz/progressbar/v3
```
## Usage
### Basic usage
```golang
bar := progressbar.Default(100)
for i := 0; i < 100; i++ {
bar.Add(1)
time.Sleep(40 * time.Millisecond)
}
```
which looks like:
![Example of basic bar](examples/basic/basic.gif)
### I/O operations
The `progressbar` implements an `io.Writer` so it can automatically detect the number of bytes written to a stream, so you can use it as a progressbar for an `io.Reader`.
```golang
req, _ := http.NewRequest("GET", "https://dl.google.com/go/go1.14.2.src.tar.gz", nil)
resp, _ := http.DefaultClient.Do(req)
defer resp.Body.Close()
f, _ := os.OpenFile("go1.14.2.src.tar.gz", os.O_CREATE|os.O_WRONLY, 0644)
defer f.Close()
bar := progressbar.DefaultBytes(
resp.ContentLength,
"downloading",
)
io.Copy(io.MultiWriter(f, bar), resp.Body)
```
which looks like:
![Example of download bar](examples/download/download.gif)
### Progress bar with unknown length
A progressbar with unknown length is a spinner. Any bar with -1 length will automatically convert it to a spinner with a customizable spinner type. For example, the above code can be run and set the `resp.ContentLength` to `-1`.
which looks like:
![Example of download bar with unknown length](examples/download-unknown/download-unknown.gif)
### Customization
There is a lot of customization that you can do - change the writer, the color, the width, description, theme, etc. See [all the options](https://pkg.go.dev/github.com/schollz/progressbar/v3?tab=doc#Option).
```golang
bar := progressbar.NewOptions(1000,
progressbar.OptionSetWriter(ansi.NewAnsiStdout()),
progressbar.OptionEnableColorCodes(true),
progressbar.OptionShowBytes(true),
progressbar.OptionSetWidth(15),
progressbar.OptionSetDescription("[cyan][1/3][reset] Writing moshable file..."),
progressbar.OptionSetTheme(progressbar.Theme{
Saucer: "[green]=[reset]",
SaucerHead: "[green]>[reset]",
SaucerPadding: " ",
BarStart: "[",
BarEnd: "]",
}))
for i := 0; i < 1000; i++ {
bar.Add(1)
time.Sleep(5 * time.Millisecond)
}
```
which looks like:
![Example of customized bar](examples/customization/customization.gif)
## Contributing
Pull requests are welcome. Feel free to...
- Revise documentation
- Add new features
- Fix bugs
- Suggest improvements
## Thanks
Thanks [@Dynom](https://github.com/dynom) for massive improvements in version 2.0!
Thanks [@CrushedPixel](https://github.com/CrushedPixel) for adding descriptions and color code support!
Thanks [@MrMe42](https://github.com/MrMe42) for adding some minor features!
Thanks [@tehstun](https://github.com/tehstun) for some great PRs!
Thanks [@Benzammour](https://github.com/Benzammour) and [@haseth](https://github.com/haseth) for helping create v3!
Thanks [@briandowns](https://github.com/briandowns) for compiling the list of spinners.
## License
MIT

View File

File diff suppressed because it is too large Load Diff

View File

@@ -1,80 +0,0 @@
package progressbar
var spinners = map[int][]string{
0: {"←", "↖", "↑", "↗", "→", "↘", "↓", "↙"},
1: {"▁", "▃", "▄", "▅", "▆", "▇", "█", "▇", "▆", "▅", "▄", "▃", "▁"},
2: {"▖", "▘", "▝", "▗"},
3: {"┤", "┘", "┴", "└", "├", "┌", "┬", "┐"},
4: {"◢", "◣", "◤", "◥"},
5: {"◰", "◳", "◲", "◱"},
6: {"◴", "◷", "◶", "◵"},
7: {"◐", "◓", "◑", "◒"},
8: {".", "o", "O", "@", "*"},
9: {"|", "/", "-", "\\"},
10: {"◡◡", "⊙⊙", "◠◠"},
11: {"⣾", "⣽", "⣻", "⢿", "⡿", "⣟", "⣯", "⣷"},
12: {">))'>", " >))'>", " >))'>", " >))'>", " >))'>", " <'((<", " <'((<", " <'((<"},
13: {"⠁", "⠂", "⠄", "⡀", "⢀", "⠠", "⠐", "⠈"},
14: {"⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"},
15: {"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"},
16: {"▉", "▊", "▋", "▌", "▍", "▎", "▏", "▎", "▍", "▌", "▋", "▊", "▉"},
17: {"■", "□", "▪", "▫"},
18: {"←", "↑", "→", "↓"},
19: {"╫", "╪"},
20: {"⇐", "⇖", "⇑", "⇗", "⇒", "⇘", "⇓", "⇙"},
21: {"⠁", "⠁", "⠉", "⠙", "⠚", "⠒", "⠂", "⠂", "⠒", "⠲", "⠴", "⠤", "⠄", "⠄", "⠤", "⠠", "⠠", "⠤", "⠦", "⠖", "⠒", "⠐", "⠐", "⠒", "⠓", "⠋", "⠉", "⠈", "⠈"},
22: {"⠈", "⠉", "⠋", "⠓", "⠒", "⠐", "⠐", "⠒", "⠖", "⠦", "⠤", "⠠", "⠠", "⠤", "⠦", "⠖", "⠒", "⠐", "⠐", "⠒", "⠓", "⠋", "⠉", "⠈"},
23: {"⠁", "⠉", "⠙", "⠚", "⠒", "⠂", "⠂", "⠒", "⠲", "⠴", "⠤", "⠄", "⠄", "⠤", "⠴", "⠲", "⠒", "⠂", "⠂", "⠒", "⠚", "⠙", "⠉", "⠁"},
24: {"⠋", "⠙", "⠚", "⠒", "⠂", "⠂", "⠒", "⠲", "⠴", "⠦", "⠖", "⠒", "⠐", "⠐", "⠒", "⠓", "⠋"},
25: {"ヲ", "ァ", "ィ", "ゥ", "ェ", "ォ", "ャ", "ュ", "ョ", "ッ", "ア", "イ", "ウ", "エ", "オ", "カ", "キ", "ク", "ケ", "コ", "サ", "シ", "ス", "セ", "ソ", "タ", "チ", "ツ", "テ", "ト", "ナ", "ニ", "ヌ", "ネ", "ノ", "ハ", "ヒ", "フ", "ヘ", "ホ", "マ", "ミ", "ム", "メ", "モ", "ヤ", "ユ", "ヨ", "ラ", "リ", "ル", "レ", "ロ", "ワ", "ン"},
26: {".", "..", "..."},
27: {"▁", "▂", "▃", "▄", "▅", "▆", "▇", "█", "▉", "▊", "▋", "▌", "▍", "▎", "▏", "▏", "▎", "▍", "▌", "▋", "▊", "▉", "█", "▇", "▆", "▅", "▄", "▃", "▂", "▁"},
28: {".", "o", "O", "°", "O", "o", "."},
29: {"+", "x"},
30: {"v", "<", "^", ">"},
31: {">>--->", " >>--->", " >>--->", " >>--->", " >>--->", " <---<<", " <---<<", " <---<<", " <---<<", "<---<<"},
32: {"|", "||", "|||", "||||", "|||||", "|||||||", "||||||||", "|||||||", "||||||", "|||||", "||||", "|||", "||", "|"},
33: {"[ ]", "[= ]", "[== ]", "[=== ]", "[==== ]", "[===== ]", "[====== ]", "[======= ]", "[======== ]", "[========= ]", "[==========]"},
34: {"(*---------)", "(-*--------)", "(--*-------)", "(---*------)", "(----*-----)", "(-----*----)", "(------*---)", "(-------*--)", "(--------*-)", "(---------*)"},
35: {"█▒▒▒▒▒▒▒▒▒", "███▒▒▒▒▒▒▒", "█████▒▒▒▒▒", "███████▒▒▒", "██████████"},
36: {"[ ]", "[=> ]", "[===> ]", "[=====> ]", "[======> ]", "[========> ]", "[==========> ]", "[============> ]", "[==============> ]", "[================> ]", "[==================> ]", "[===================>]"},
37: {"", ""},
38: {"▌", "▀", "▐▄"},
39: {"🌍", "🌎", "🌏"},
40: {"◜", "◝", "◞", "◟"},
41: {"⬒", "⬔", "⬓", "⬕"},
42: {"⬖", "⬘", "⬗", "⬙"},
43: {"[>>> >]", "[]>>>> []", "[] >>>> []", "[] >>>> []", "[] >>>> []", "[] >>>>[]", "[>> >>]"},
44: {"♠", "♣", "♥", "♦"},
45: {"➞", "➟", "➠", "➡", "➠", "➟"},
46: {" | ", ` \ `, "_ ", ` \ `, " | ", " / ", " _", " / "},
47: {" . . . .", ". . . .", ". . . .", ". . . .", ". . . . ", ". . . . ."},
48: {" | ", " / ", " _ ", ` \ `, " | ", ` \ `, " _ ", " / "},
49: {"⎺", "⎻", "⎼", "⎽", "⎼", "⎻"},
50: {"▹▹▹▹▹", "▸▹▹▹▹", "▹▸▹▹▹", "▹▹▸▹▹", "▹▹▹▸▹", "▹▹▹▹▸"},
51: {"[ ]", "[ =]", "[ ==]", "[ ===]", "[====]", "[=== ]", "[== ]", "[= ]"},
52: {"( ● )", "( ● )", "( ● )", "( ● )", "( ●)", "( ● )", "( ● )", "( ● )", "( ● )"},
53: {"✶", "✸", "✹", "✺", "✹", "✷"},
54: {"▐|\\____________▌", "▐_|\\___________▌", "▐__|\\__________▌", "▐___|\\_________▌", "▐____|\\________▌", "▐_____|\\_______▌", "▐______|\\______▌", "▐_______|\\_____▌", "▐________|\\____▌", "▐_________|\\___▌", "▐__________|\\__▌", "▐___________|\\_▌", "▐____________|\\▌", "▐____________/|▌", "▐___________/|_▌", "▐__________/|__▌", "▐_________/|___▌", "▐________/|____▌", "▐_______/|_____▌", "▐______/|______▌", "▐_____/|_______▌", "▐____/|________▌", "▐___/|_________▌", "▐__/|__________▌", "▐_/|___________▌", "▐/|____________▌"},
55: {"▐⠂ ▌", "▐⠈ ▌", "▐ ⠂ ▌", "▐ ⠠ ▌", "▐ ⡀ ▌", "▐ ⠠ ▌", "▐ ⠂ ▌", "▐ ⠈ ▌", "▐ ⠂ ▌", "▐ ⠠ ▌", "▐ ⡀ ▌", "▐ ⠠ ▌", "▐ ⠂ ▌", "▐ ⠈ ▌", "▐ ⠂▌", "▐ ⠠▌", "▐ ⡀▌", "▐ ⠠ ▌", "▐ ⠂ ▌", "▐ ⠈ ▌", "▐ ⠂ ▌", "▐ ⠠ ▌", "▐ ⡀ ▌", "▐ ⠠ ▌", "▐ ⠂ ▌", "▐ ⠈ ▌", "▐ ⠂ ▌", "▐ ⠠ ▌", "▐ ⡀ ▌", "▐⠠ ▌"},
56: {"¿", "?"},
57: {"⢹", "⢺", "⢼", "⣸", "⣇", "⡧", "⡗", "⡏"},
58: {"⢄", "⢂", "⢁", "⡁", "⡈", "⡐", "⡠"},
59: {". ", ".. ", "...", " ..", " .", " "},
60: {".", "o", "O", "°", "O", "o", "."},
61: {"▓", "▒", "░"},
62: {"▌", "▀", "▐", "▄"},
63: {"⊶", "⊷"},
64: {"▪", "▫"},
65: {"□", "■"},
66: {"▮", "▯"},
67: {"-", "=", "≡"},
68: {"d", "q", "p", "b"},
69: {"∙∙∙", "●∙∙", "∙●∙", "∙∙●", "∙∙∙"},
70: {"🌑 ", "🌒 ", "🌓 ", "🌔 ", "🌕 ", "🌖 ", "🌗 ", "🌘 "},
71: {"☗", "☖"},
72: {"⧇", "⧆"},
73: {"◉", "◎"},
74: {"㊂", "㊀", "㊁"},
75: {"⦾", "⦿"},
}

View File

@@ -10,6 +10,8 @@ docker buildx build \
--platform=linux/arm64,linux/amd64 \
--build-arg=VERSION \
--build-arg=GOFLAGS \
--cache-from type=local,src=.cache \
--cache-to type=local,dest=.cache \
-f Dockerfile \
-t ollama \
.

View File

@@ -10,6 +10,7 @@ docker buildx build \
--platform=linux/arm64,linux/amd64 \
--build-arg=VERSION \
--build-arg=GOFLAGS \
--cache-from type=local,src=.cache \
-f Dockerfile \
-t ollama/ollama -t ollama/ollama:$VERSION \
.

View File

@@ -7,6 +7,7 @@ import (
"fmt"
"io"
"log"
"math"
"net/http"
"net/url"
"os"
@@ -53,8 +54,8 @@ type blobDownloadPart struct {
const (
numDownloadParts = 64
minDownloadPartSize int64 = 32 * 1000 * 1000
maxDownloadPartSize int64 = 256 * 1000 * 1000
minDownloadPartSize int64 = 100 * format.MegaByte
maxDownloadPartSize int64 = 1000 * format.MegaByte
)
func (p *blobDownloadPart) Name() string {
@@ -147,7 +148,6 @@ func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *Regis
continue
}
i := i
g.Go(func() error {
var err error
for try := 0; try < maxRetries; try++ {
@@ -158,12 +158,11 @@ func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *Regis
// return immediately if the context is canceled or the device is out of space
return err
case err != nil:
log.Printf("%s part %d attempt %d failed: %v, retrying", b.Digest[7:19], i, try, err)
sleep := time.Second * time.Duration(math.Pow(2, float64(try)))
log.Printf("%s part %d attempt %d failed: %v, retrying in %s", b.Digest[7:19], part.N, try, err, sleep)
time.Sleep(sleep)
continue
default:
if try > 0 {
log.Printf("%s part %d completed after %d retries", b.Digest[7:19], i, try)
}
return nil
}
}
@@ -285,7 +284,7 @@ func (b *blobDownload) Wait(ctx context.Context, fn func(api.ProgressResponse))
}
fn(api.ProgressResponse{
Status: fmt.Sprintf("downloading %s", b.Digest),
Status: fmt.Sprintf("pulling %s", b.Digest[7:19]),
Digest: b.Digest,
Total: b.Total,
Completed: b.Completed.Load(),
@@ -304,7 +303,7 @@ type downloadOpts struct {
fn func(api.ProgressResponse)
}
const maxRetries = 3
const maxRetries = 6
var errMaxRetriesExceeded = errors.New("max retries exceeded")
@@ -322,7 +321,7 @@ func downloadBlob(ctx context.Context, opts downloadOpts) error {
return err
default:
opts.fn(api.ProgressResponse{
Status: fmt.Sprintf("downloading %s", opts.digest),
Status: fmt.Sprintf("pulling %s", opts.digest[7:19]),
Digest: opts.digest,
Total: fi.Size(),
Completed: fi.Size(),

View File

@@ -228,26 +228,6 @@ func GetModel(name string) (*Model, error) {
return model, nil
}
func filenameWithPath(path, f string) (string, error) {
// if filePath starts with ~/, replace it with the user's home directory.
if strings.HasPrefix(f, fmt.Sprintf("~%s", string(os.PathSeparator))) {
parts := strings.Split(f, string(os.PathSeparator))
home, err := os.UserHomeDir()
if err != nil {
return "", fmt.Errorf("failed to open file: %v", err)
}
f = filepath.Join(home, filepath.Join(parts[1:]...))
}
// if filePath is not an absolute path, make it relative to the modelfile path
if !filepath.IsAbs(f) {
f = filepath.Join(filepath.Dir(path), f)
}
return f, nil
}
func realpath(p string) string {
abspath, err := filepath.Abs(p)
if err != nil {
@@ -1146,43 +1126,49 @@ func GetSHA256Digest(r io.Reader) (string, int64) {
var errUnauthorized = fmt.Errorf("unauthorized")
func makeRequestWithRetry(ctx context.Context, method string, requestURL *url.URL, headers http.Header, body io.ReadSeeker, regOpts *RegistryOptions) (*http.Response, error) {
lastErr := errMaxRetriesExceeded
for try := 0; try < maxRetries; try++ {
resp, err := makeRequest(ctx, method, requestURL, headers, body, regOpts)
if err != nil {
log.Printf("couldn't start upload: %v", err)
return nil, err
resp, err := makeRequest(ctx, method, requestURL, headers, body, regOpts)
if err != nil {
if !errors.Is(err, context.Canceled) {
log.Printf("request failed: %v", err)
}
switch {
case resp.StatusCode == http.StatusUnauthorized:
auth := resp.Header.Get("www-authenticate")
authRedir := ParseAuthRedirectString(auth)
token, err := getAuthToken(ctx, authRedir)
return nil, err
}
switch {
case resp.StatusCode == http.StatusUnauthorized:
// Handle authentication error with one retry
auth := resp.Header.Get("www-authenticate")
authRedir := ParseAuthRedirectString(auth)
token, err := getAuthToken(ctx, authRedir)
if err != nil {
return nil, err
}
regOpts.Token = token
if body != nil {
_, err = body.Seek(0, io.SeekStart)
if err != nil {
return nil, err
}
regOpts.Token = token
if body != nil {
body.Seek(0, io.SeekStart)
}
lastErr = errUnauthorized
case resp.StatusCode == http.StatusNotFound:
return nil, os.ErrNotExist
case resp.StatusCode >= http.StatusBadRequest:
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("%d: %s", resp.StatusCode, err)
}
return nil, fmt.Errorf("%d: %s", resp.StatusCode, body)
default:
return resp, nil
}
resp, err := makeRequest(ctx, method, requestURL, headers, body, regOpts)
if resp.StatusCode == http.StatusUnauthorized {
return nil, errUnauthorized
}
return resp, err
case resp.StatusCode == http.StatusNotFound:
return nil, os.ErrNotExist
case resp.StatusCode >= http.StatusBadRequest:
responseBody, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("%d: %s", resp.StatusCode, err)
}
return nil, fmt.Errorf("%d: %s", resp.StatusCode, responseBody)
}
return nil, lastErr
return resp, nil
}
func makeRequest(ctx context.Context, method string, requestURL *url.URL, headers http.Header, body io.Reader, regOpts *RegistryOptions) (*http.Response, error) {

View File

@@ -666,8 +666,14 @@ func HeadBlobHandler(c *gin.Context) {
}
func CreateBlobHandler(c *gin.Context) {
targetPath, err := GetBlobsPath(c.Param("digest"))
if err != nil {
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
hash := sha256.New()
temp, err := os.CreateTemp("", c.Param("digest"))
temp, err := os.CreateTemp(filepath.Dir(targetPath), c.Param("digest")+"-")
if err != nil {
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
@@ -690,12 +696,6 @@ func CreateBlobHandler(c *gin.Context) {
return
}
targetPath, err := GetBlobsPath(c.Param("digest"))
if err != nil {
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
if err := os.Rename(temp.Name(), targetPath); err != nil {
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
@@ -794,7 +794,7 @@ func Serve(ln net.Listener, allowOrigins []string) error {
if runtime.GOOS == "linux" {
// check compatibility to log warnings
if _, err := llm.CheckVRAM(); err != nil {
log.Printf("Warning: GPU support may not be enabled, check you have installed GPU drivers: %v", err)
log.Printf(err.Error())
}
}

View File

@@ -5,9 +5,9 @@ import (
"crypto/md5"
"errors"
"fmt"
"hash"
"io"
"log"
"math"
"net/http"
"net/url"
"os"
@@ -35,6 +35,8 @@ type blobUpload struct {
context.CancelFunc
file *os.File
done bool
err error
references atomic.Int32
@@ -42,8 +44,8 @@ type blobUpload struct {
const (
numUploadParts = 64
minUploadPartSize int64 = 95 * 1000 * 1000
maxUploadPartSize int64 = 1000 * 1000 * 1000
minUploadPartSize int64 = 100 * format.MegaByte
maxUploadPartSize int64 = 1000 * format.MegaByte
)
func (b *blobUpload) Prepare(ctx context.Context, requestURL *url.URL, opts *RegistryOptions) error {
@@ -128,12 +130,12 @@ func (b *blobUpload) Run(ctx context.Context, opts *RegistryOptions) {
return
}
f, err := os.Open(p)
b.file, err = os.Open(p)
if err != nil {
b.err = err
return
}
defer f.Close()
defer b.file.Close()
g, inner := errgroup.WithContext(ctx)
g.SetLimit(numUploadParts)
@@ -145,7 +147,6 @@ func (b *blobUpload) Run(ctx context.Context, opts *RegistryOptions) {
g.Go(func() error {
var err error
for try := 0; try < maxRetries; try++ {
part.ReadSeeker = io.NewSectionReader(f, part.Offset, part.Size)
err = b.uploadChunk(inner, http.MethodPatch, requestURL, part, opts)
switch {
case errors.Is(err, context.Canceled):
@@ -153,7 +154,10 @@ func (b *blobUpload) Run(ctx context.Context, opts *RegistryOptions) {
case errors.Is(err, errMaxRetriesExceeded):
return err
case err != nil:
log.Printf("%s part %d attempt %d failed: %v, retrying", b.Digest[7:19], part.N, try, err)
part.Reset()
sleep := time.Second * time.Duration(math.Pow(2, float64(try)))
log.Printf("%s part %d attempt %d failed: %v, retrying in %s", b.Digest[7:19], part.N, try, err, sleep)
time.Sleep(sleep)
continue
}
@@ -173,8 +177,16 @@ func (b *blobUpload) Run(ctx context.Context, opts *RegistryOptions) {
requestURL := <-b.nextURL
var sb strings.Builder
// calculate md5 checksum and add it to the commit request
for _, part := range b.Parts {
sb.Write(part.Sum(nil))
hash := md5.New()
if _, err := io.Copy(hash, io.NewSectionReader(b.file, part.Offset, part.Size)); err != nil {
b.err = err
return
}
sb.Write(hash.Sum(nil))
}
md5sum := md5.Sum([]byte(sb.String()))
@@ -188,29 +200,39 @@ func (b *blobUpload) Run(ctx context.Context, opts *RegistryOptions) {
headers.Set("Content-Type", "application/octet-stream")
headers.Set("Content-Length", "0")
resp, err := makeRequestWithRetry(ctx, http.MethodPut, requestURL, headers, nil, opts)
if err != nil {
b.err = err
for try := 0; try < maxRetries; try++ {
resp, err := makeRequestWithRetry(ctx, http.MethodPut, requestURL, headers, nil, opts)
if err != nil {
b.err = err
if errors.Is(err, context.Canceled) {
return
}
sleep := time.Second * time.Duration(math.Pow(2, float64(try)))
log.Printf("%s complete upload attempt %d failed: %v, retrying in %s", b.Digest[7:19], try, err, sleep)
time.Sleep(sleep)
continue
}
defer resp.Body.Close()
b.err = nil
b.done = true
return
}
defer resp.Body.Close()
b.done = true
}
func (b *blobUpload) uploadChunk(ctx context.Context, method string, requestURL *url.URL, part *blobUploadPart, opts *RegistryOptions) error {
part.Reset()
headers := make(http.Header)
headers.Set("Content-Type", "application/octet-stream")
headers.Set("Content-Length", fmt.Sprintf("%d", part.Size))
headers.Set("X-Redirect-Uploads", "1")
if method == http.MethodPatch {
headers.Set("X-Redirect-Uploads", "1")
headers.Set("Content-Range", fmt.Sprintf("%d-%d", part.Offset, part.Offset+part.Size-1))
}
resp, err := makeRequest(ctx, method, requestURL, headers, io.TeeReader(part.ReadSeeker, io.MultiWriter(part, part.Hash)), opts)
sr := io.NewSectionReader(b.file, part.Offset, part.Size)
resp, err := makeRequest(ctx, method, requestURL, headers, io.TeeReader(sr, part), opts)
if err != nil {
return err
}
@@ -235,6 +257,7 @@ func (b *blobUpload) uploadChunk(ctx context.Context, method string, requestURL
return err
}
// retry uploading to the redirect URL
for try := 0; try < maxRetries; try++ {
err = b.uploadChunk(ctx, http.MethodPut, redirectURL, part, nil)
switch {
@@ -243,7 +266,10 @@ func (b *blobUpload) uploadChunk(ctx context.Context, method string, requestURL
case errors.Is(err, errMaxRetriesExceeded):
return err
case err != nil:
log.Printf("%s part %d attempt %d failed: %v, retrying", b.Digest[7:19], part.N, try, err)
part.Reset()
sleep := time.Second * time.Duration(math.Pow(2, float64(try)))
log.Printf("%s part %d attempt %d failed: %v, retrying in %s", b.Digest[7:19], part.N, try, err, sleep)
time.Sleep(sleep)
continue
}
@@ -301,7 +327,7 @@ func (b *blobUpload) Wait(ctx context.Context, fn func(api.ProgressResponse)) er
}
fn(api.ProgressResponse{
Status: fmt.Sprintf("uploading %s", b.Digest),
Status: fmt.Sprintf("pushing %s", b.Digest[7:19]),
Digest: b.Digest,
Total: b.Total,
Completed: b.Completed.Load(),
@@ -315,14 +341,10 @@ func (b *blobUpload) Wait(ctx context.Context, fn func(api.ProgressResponse)) er
type blobUploadPart struct {
// N is the part number
N int
Offset int64
Size int64
hash.Hash
N int
Offset int64
Size int64
written int64
io.ReadSeeker
*blobUpload
}
@@ -334,10 +356,8 @@ func (p *blobUploadPart) Write(b []byte) (n int, err error) {
}
func (p *blobUploadPart) Reset() {
p.Seek(0, io.SeekStart)
p.Completed.Add(-int64(p.written))
p.written = 0
p.Hash = md5.New()
}
func uploadBlob(ctx context.Context, mp ModelPath, layer *Layer, opts *RegistryOptions, fn func(api.ProgressResponse)) error {
@@ -352,7 +372,7 @@ func uploadBlob(ctx context.Context, mp ModelPath, layer *Layer, opts *RegistryO
default:
defer resp.Body.Close()
fn(api.ProgressResponse{
Status: fmt.Sprintf("uploading %s", layer.Digest),
Status: fmt.Sprintf("pushing %s", layer.Digest[7:19]),
Digest: layer.Digest,
Total: layer.Size,
Completed: layer.Size,