Compare commits

...

68 Commits
v0.0.20 ... api

Author SHA1 Message Date
Jeffrey Morgan
949fc4eafa wip /api/chat 2023-10-01 14:54:17 -07:00
Michael Yang
0a4f21c0a7 fix docker build (#659) 2023-09-30 13:34:01 -07:00
Jeffrey Morgan
9abb66254a docker: fix volume permission errors 2023-09-30 12:32:15 -07:00
Jay Nakrani
1d0ebe67e8 Document response stream chunk delimiter. (#632)
Document response stream chunk delimiter.
2023-09-29 21:45:52 -07:00
Bruce MacDonald
a1b2d95f96 remove unused push/pull params (#650) 2023-09-29 17:27:19 -04:00
Michael Yang
c0b1bf7537 Merge pull request #606 from jmorganca/mxyng/install.sh-2
ordered list of install locations
2023-09-29 11:30:46 -07:00
Michael Yang
cdfeb165ca Merge pull request #608 from jmorganca/mxyng/build
update build scripts
2023-09-29 11:30:25 -07:00
Michael Yang
92d454ec5f update build_darwin.sh 2023-09-29 11:29:23 -07:00
Michael Yang
9333b0cc82 Merge pull request #612 from jmorganca/mxyng/prune-empty-directories
prune empty directories
2023-09-29 11:23:39 -07:00
Bruce MacDonald
9771b1ec51 windows runner fixes (#637) 2023-09-29 11:47:55 -04:00
Patrick Devine
76db4a49cf allow the user to cancel generating with ctrl-C (#641) 2023-09-28 17:13:01 -07:00
Luc Stepniewski
4aa0976a2e Added missing return preventing SIGSEGV because of missing resp (#621)
Co-authored-by: Luc Stepniewski <luc@eclipse-fr.com>
2023-09-28 14:25:22 -07:00
Patrick Devine
92c20fdae6 fix error messages for unknown commands in the repl (#611) 2023-09-28 14:19:45 -07:00
Michael Yang
c951da7096 Merge pull request #634 from jmorganca/mxyng/int64
use int64 consistently
2023-09-28 14:17:47 -07:00
Bruce MacDonald
24d82a23a2 do not download updates multiple times (#633) 2023-09-28 15:29:17 -04:00
Michael Yang
f40b3de758 use int64 consistently 2023-09-28 11:07:24 -07:00
Michael
5f4008c296 Update README.md
adding in instruction to run mistral
2023-09-28 09:06:03 -07:00
Aaron Coffey
6ae33d8141 Update modelfile.md to reflect the usage of num_gpu. (#629) 2023-09-28 10:21:21 -04:00
Jeffrey Morgan
c5664c1fef Update faq.md 2023-09-27 13:49:43 -07:00
Bruce MacDonald
958a5a8184 revert fedora cuda version check 2023-09-27 15:12:29 -04:00
Michael Yang
8608eb4760 prune empty directories 2023-09-27 10:58:09 -07:00
Bruce MacDonald
a2b210130f fedora install fixes (#609) 2023-09-27 11:43:47 -04:00
Bruce MacDonald
ed20837f9a Update modelfile.md 2023-09-27 10:38:10 -04:00
James Braza
1db2a61dd0 Added num_predict to the options table (#614) 2023-09-27 10:26:08 -04:00
Jeffrey Morgan
2ded8ab206 use 11.8.0 nvidia dockerfile base image for now 2023-09-26 21:48:41 -07:00
Michael Yang
e6b3648bbf Merge pull request #616 from jmorganca/mxyng/fix-model-name 2023-09-26 20:54:18 -07:00
Michael Yang
0625e805f0 fix model name not matching 2023-09-26 19:50:04 -07:00
Michael Yang
c38ec5befb Merge pull request #598 from jmorganca/mxyng/help-exit
add painter message for exit
2023-09-26 15:17:40 -07:00
Michael Yang
c577721a43 Merge pull request #605 from jmorganca/mxyng/install.sh
do not unload nouveau driver
2023-09-26 09:53:05 -07:00
Michael Yang
29c056ea39 ordered list of install locations 2023-09-26 09:38:11 -07:00
Michael Yang
9fc3bba9cf do no unload nouveau driver 2023-09-26 09:36:54 -07:00
Michael Chiang
7774ed4ae6 Update README.md for linux + cleanup (#601)
Co-authored-by: Jeffrey Morgan <jmorganca@gmail.com>
2023-09-25 23:44:53 -07:00
Michael Yang
11f920f209 Merge pull request #599 from jmorganca/mxyng/install.sh
update install.sh
2023-09-25 18:24:13 -07:00
Michael Yang
6e6b655956 update install.sh 2023-09-25 18:09:44 -07:00
Michael Yang
110ae89a6c Merge pull request #596 from jmorganca/mxyng/install.sh
update install.sh
2023-09-25 17:59:13 -07:00
Michael Yang
5e388f931e check cuda installed before installing 2023-09-25 17:56:43 -07:00
Michael Yang
d5ad41dd7b fix path for wsl user 2023-09-25 17:56:25 -07:00
Michael Yang
d294a11bc9 start service on exit instead of immediately 2023-09-25 17:54:02 -07:00
Michael Yang
93d887e4bc add painter message for exit 2023-09-25 16:30:22 -07:00
Jeffrey Morgan
5306b0269d Update linux.md 2023-09-25 16:10:32 -07:00
Michael Yang
7de0c8345d Merge pull request #595 from jmorganca/mxyng/install.sh
ignore systemctl is-system-running exit code
2023-09-25 15:49:47 -07:00
Michael Yang
1b9dcab3ab ignore systemctl is-system-running exit code 2023-09-25 15:47:45 -07:00
Bruce MacDonald
86279f4ae3 unbound max num gpu layers (#591)
---------

Co-authored-by: Michael Yang <mxyng@pm.me>
2023-09-25 18:36:46 -04:00
Michael Yang
b934bf23e6 exit on unknown distro (#594) 2023-09-25 15:30:58 -07:00
Michael Yang
2b8ef455ad Merge pull request #593 from jmorganca/mxyng/install.sh
update install.sh
2023-09-25 14:09:40 -07:00
Michael Yang
0c5f47177c update install.sh 2023-09-25 14:01:44 -07:00
Michael Yang
1210db2924 Merge pull request #592 from jmorganca/mxyng/install.sh
fix dkms on debian
2023-09-25 12:59:01 -07:00
Michael Yang
d0854bf1e6 fix dkms on debian 2023-09-25 12:57:25 -07:00
Michael Yang
8396463255 Merge pull request #590 from jmorganca/mxyng/install.sh
fix dkms install
2023-09-25 12:17:31 -07:00
Michael Yang
a027bbf4d7 fix dkms install 2023-09-25 12:16:41 -07:00
Michael Yang
ed94a3dd02 Merge pull request #589 from jmorganca/mxyng/install.sh
update install.sh
2023-09-25 11:08:25 -07:00
Michael Yang
f14f62ab3b update install.sh 2023-09-25 11:05:38 -07:00
Jeffrey Morgan
0fb5268496 Update linux.md 2023-09-25 10:06:23 -07:00
Bruce MacDonald
c65edb1506 fix linux installer warning logs (#588) 2023-09-25 11:22:56 -04:00
Twan L
1605af32ec Added a new community project (#574) 2023-09-25 10:40:59 -04:00
Jeffrey Morgan
ee3032ad89 improvements to docs/linux.md 2023-09-24 21:50:07 -07:00
Jeffrey Morgan
5b7a27281d improvements to docs/linux.md 2023-09-24 21:38:23 -07:00
Jeffrey Morgan
d2a784e33e add docs/linux.md 2023-09-24 21:34:44 -07:00
Jeffrey Morgan
413a2e4f91 set DEBIAN_FRONTEND=noninteractive correctly 2023-09-24 20:35:42 -07:00
Patrick Devine
b5614f3ebc fix end-of-line issue with the new prompt (#582) 2023-09-23 17:20:30 -07:00
Jeffrey Morgan
8b2ba9cab8 minor improvements to install.sh 2023-09-23 11:20:39 -04:00
Jeffrey Morgan
e29662ab5c fix minor install script issues on debian 2023-09-23 10:25:47 -04:00
Bruce MacDonald
cbc40aa996 debian installer support (#579)
* debian installer support

- normalize os name to lowercase
- check needed commands are available
- dont check sudo when root user
- share common install commands
- support debian cuda install
- skip aarm cuda install
- system user shared home dir

* refactor and add other platforms (#580)

---------

Co-authored-by: Michael Yang <mxyng@pm.me>
2023-09-23 09:46:47 -04:00
Jeffrey Morgan
5cb82540c9 install.sh: update install url 2023-09-23 09:35:14 -04:00
Jeffrey Morgan
d7849a1dc9 add .env to .dockerignore 2023-09-23 00:53:48 -04:00
Jeffrey Morgan
01c44d687e add multi line strings to final prompt 2023-09-23 00:27:24 -04:00
Jeffrey Morgan
9b12a511ca check other request fields before load short circuit in /api/generate 2023-09-22 23:50:55 -04:00
Jeffrey Morgan
e20362e0d5 fix multi line input in ollama run 2023-09-22 23:49:35 -04:00
26 changed files with 807 additions and 413 deletions

View File

@@ -5,3 +5,4 @@ dist
scripts
llm/llama.cpp/ggml
llm/llama.cpp/gguf
.env

View File

@@ -1,9 +1,8 @@
ARG CUDA_VERSION=12.2.0
FROM nvidia/cuda:$CUDA_VERSION-devel-ubuntu22.04
FROM nvidia/cuda:11.8.0-devel-ubuntu22.04
ARG TARGETARCH
ARG VERSION=0.0.0
ARG GOFLAGS="'-ldflags=-w -s'"
WORKDIR /go/src/github.com/jmorganca/ollama
RUN apt-get update && apt-get install -y git build-essential cmake
@@ -13,19 +12,12 @@ RUN mkdir -p /usr/local && tar xz -C /usr/local </tmp/go1.21.1.tar.gz
COPY . .
ENV GOARCH=$TARGETARCH
RUN /usr/local/go/bin/go generate ./... \
&& /usr/local/go/bin/go build -ldflags "-linkmode=external -extldflags='-static' -X=github.com/jmorganca/ollama/version.Version=$VERSION -X=github.com/jmorganca/ollama/server.mode=release" .
&& /usr/local/go/bin/go build .
FROM ubuntu:22.04
ENV OLLAMA_HOST 0.0.0.0
RUN apt-get update && apt-get install -y ca-certificates
ARG USER=ollama
ARG GROUP=ollama
RUN groupadd $GROUP && useradd -m -g $GROUP $USER
COPY --from=0 /go/src/github.com/jmorganca/ollama/ollama /bin/ollama
USER $USER:$GROUP
EXPOSE 11434
ENV OLLAMA_HOST 0.0.0.0
ENTRYPOINT ["/bin/ollama"]
CMD ["serve"]

View File

@@ -1,4 +1,3 @@
ARG VERSION=0.0.0
# centos7 amd64 dependencies
FROM --platform=linux/amd64 nvidia/cuda:11.8.0-devel-centos7 AS base-amd64
@@ -23,7 +22,11 @@ RUN mkdir -p /usr/local && tar xz -C /usr/local </tmp/go1.21.1.tar.gz
# build the final binary
WORKDIR /go/src/github.com/jmorganca/ollama
COPY . .
ENV GOOS=linux
ENV GOARCH=$TARGETARCH
ARG VERSION=0.0.0
ARG GOFLAGS="'-ldflags -w -s'"
RUN /usr/local/go/bin/go generate ./... && \
/usr/local/go/bin/go build -ldflags "-X=github.com/jmorganca/ollama/version.Version=$VERSION -X=github.com/jmorganca/ollama/server.mode=release" .
/usr/local/go/bin/go build .

196
README.md
View File

@@ -9,19 +9,27 @@
[![Discord](https://dcbadge.vercel.app/api/server/ollama?style=flat&compact=true)](https://discord.gg/ollama)
Run, create, and share large language models (LLMs).
Get up and running with large language models locally.
> Note: Ollama is in early preview. Please report any issues you find.
### macOS
## Download
[Download](https://ollama.ai/download/Ollama-darwin.zip)
- [Download](https://ollama.ai/download) for macOS
- Download for Windows and Linux (coming soon)
- Build [from source](#building)
### Linux & WSL2
```
curl https://ollama.ai/install.sh | sh
```
[Manual install instructions](https://github.com/jmorganca/ollama/blob/main/docs/linux.md)
### Windows
coming soon
## Quickstart
To run and chat with [Llama 2](https://ai.meta.com/llama), the new model by Meta:
To run and chat with [Llama 2](https://ollama.ai/library/llama2):
```
ollama run llama2
@@ -29,87 +37,50 @@ ollama run llama2
## Model library
Ollama supports a list of open-source models available on [ollama.ai/library](https://ollama.ai/library 'ollama model library')
Ollama supports a list of open-source models available on [ollama.ai/library](https://ollama.ai/library "ollama model library")
Here are some example open-source models that can be downloaded:
| Model | Parameters | Size | Download |
| ------------------------ | ---------- | ----- | ------------------------------- |
| Llama2 | 7B | 3.8GB | `ollama pull llama2` |
| Llama2 13B | 13B | 7.3GB | `ollama pull llama2:13b` |
| Llama2 70B | 70B | 39GB | `ollama pull llama2:70b` |
| Llama2 Uncensored | 7B | 3.8GB | `ollama pull llama2-uncensored` |
| Code Llama | 7B | 3.8GB | `ollama pull codellama` |
| Orca Mini | 3B | 1.9GB | `ollama pull orca-mini` |
| Vicuna | 7B | 3.8GB | `ollama pull vicuna` |
| Nous-Hermes | 7B | 3.8GB | `ollama pull nous-hermes` |
| Nous-Hermes 13B | 13B | 7.3GB | `ollama pull nous-hermes:13b` |
| Wizard Vicuna Uncensored | 13B | 7.3GB | `ollama pull wizard-vicuna` |
| Model | Parameters | Size | Download |
| ------------------ | ---------- | ----- | ------------------------------ |
| Mistral | 7B | 4.1GB | `ollama run mistral` |
| Llama 2 | 7B | 3.8GB | `ollama run llama2` |
| Code Llama | 7B | 3.8GB | `ollama run codellama` |
| Llama 2 Uncensored | 7B | 3.8GB | `ollama run llama2-uncensored` |
| Llama 2 13B | 13B | 7.3GB | `ollama run llama2:13b` |
| Llama 2 70B | 70B | 39GB | `ollama run llama2:70b` |
| Orca Mini | 3B | 1.9GB | `ollama run orca-mini` |
| Vicuna | 7B | 3.8GB | `ollama run vicuna` |
> Note: You should have at least 8 GB of RAM to run the 3B models, 16 GB to run the 7B models, and 32 GB to run the 13B models.
## Examples
## Customize your own model
### Pull a public model
### Import from GGUF or GGML
```
ollama pull llama2
```
Ollama supports importing GGUF and GGML file formats in the Modelfile. This means if you have a model that is not in the Ollama library, you can create it, iterate on it, and upload it to the Ollama library to share with others when you are ready.
> This command can also be used to update a local model. Only updated changes will be pulled.
1. Create a file named Modelfile, and add a `FROM` instruction with the local filepath to the model you want to import.
### Run a model interactively
```
FROM ./vicuna-33b.Q4_0.gguf
```
```
ollama run llama2
>>> hi
Hello! How can I help you today?
```
3. Create the model in Ollama
For multiline input, you can wrap text with `"""`:
```
ollama create name -f path_to_modelfile
```
```
>>> """Hello,
... world!
... """
I'm a basic program that prints the famous "Hello, world!" message to the console.
```
5. Run the model
### Run a model non-interactively
```
ollama run name
```
```
$ ollama run llama2 'tell me a joke'
Sure! Here's a quick one:
Why did the scarecrow win an award? Because he was outstanding in his field!
```
### Customize a prompt
```
$ cat <<EOF >prompts.txt
tell me a joke about llamas
tell me another one
EOF
$ ollama run llama2 <prompts.txt
>>> tell me a joke about llamas
Why did the llama refuse to play hide-and-seek?
nobody likes to be hided!
>>> tell me another one
Sure, here's another one:
Why did the llama go to the bar?
To have a hay-often good time!
```
### Run a model on contents of a text file
```
$ ollama run llama2 "summarize this file:" "$(cat README.md)"
Ollama is a lightweight, extensible framework for building and running language models on the local machine. It provides a simple API for creating, running, and managing models, as well as a library of pre-built models that can be easily used in a variety of applications.
```
### Customize a model
Pull a base model:
Models from the Ollama library can be customized with a prompt. The example
```
ollama pull llama2
@@ -138,30 +109,61 @@ ollama run mario
Hello! It's your friend Mario.
```
For more examples, see the [examples](./examples) directory. For more information on creating a Modelfile, see the [Modelfile](./docs/modelfile.md) documentation.
For more examples, see the [examples](./examples) directory. For more information on working with a Modelfile, see the [Modelfile](./docs/modelfile.md) documentation.
### Listing local models
## CLI Reference
### Create a model
`ollama create` is used to create a model from a Modelfile.
### Pull a model
```
ollama list
ollama pull llama2
```
### Removing local models
> This command can also be used to update a local model. Only the diff will be pulled.
### Remove a model
```
ollama rm llama2
```
## Model packages
### Copy a model
### Overview
```
ollama cp llama2 my-llama2
```
Ollama bundles model weights, configurations, and data into a single package, defined by a [Modelfile](./docs/modelfile.md).
### Multiline input
<picture>
<source media="(prefers-color-scheme: dark)" height="480" srcset="https://github.com/jmorganca/ollama/assets/251292/2fd96b5f-191b-45c1-9668-941cfad4eb70">
<img alt="logo" height="480" src="https://github.com/jmorganca/ollama/assets/251292/2fd96b5f-191b-45c1-9668-941cfad4eb70">
</picture>
For multiline input, you can wrap text with `"""`:
```
>>> """Hello,
... world!
... """
I'm a basic program that prints the famous "Hello, world!" message to the console.
```
### Pass in prompt as arguments
```
$ ollama run llama2 "summarize this file:" "$(cat README.md)"
Ollama is a lightweight, extensible framework for building and running language models on the local machine. It provides a simple API for creating, running, and managing models, as well as a library of pre-built models that can be easily used in a variety of applications.
```
### List models on your computer
```
ollama list
```
### Start Ollama
`ollama serve` is used when you want to start ollama without running the desktop application.
## Building
@@ -204,18 +206,18 @@ curl -X POST http://localhost:11434/api/generate -d '{
}'
```
## Community Projects using Ollama
## Community Integrations
| Project | Description |
| -------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| [LangChain][1] and [LangChain.js][2] | Also, there is a question-answering [example][3]. |
| [Continue](https://github.com/continuedev/continue) | Embeds Ollama inside Visual Studio Code. The extension lets you highlight code to add to the prompt, ask questions in the sidebar, and generate code inline. |
| [LiteLLM](https://github.com/BerriAI/litellm) | Lightweight Python package to simplify LLM API calls. |
| [Discord AI Bot](https://github.com/mekb-turtle/discord-ai-bot) | Interact with Ollama as a chatbot on Discord. |
| [Raycast Ollama](https://github.com/MassimilianoPasquini97/raycast_ollama) | Raycast extension to use Ollama for local llama inference on Raycast. |
| [Simple HTML UI](https://github.com/rtcfirefly/ollama-ui) | Also, there is a Chrome extension. |
| [Emacs client](https://github.com/zweifisch/ollama) | |
[1]: https://python.langchain.com/docs/integrations/llms/ollama
[2]: https://js.langchain.com/docs/modules/model_io/models/llms/integrations/ollama
[3]: https://js.langchain.com/docs/use_cases/question_answering/local_retrieval_qa
- [LangChain](https://python.langchain.com/docs/integrations/llms/ollama) and [LangChain.js](https://js.langchain.com/docs/modules/model_io/models/llms/integrations/ollama) with [example](https://js.langchain.com/docs/use_cases/question_answering/local_retrieval_qa)
- [LlamaIndex](https://gpt-index.readthedocs.io/en/stable/examples/llm/ollama.html)
- [Raycast extension](https://github.com/MassimilianoPasquini97/raycast_ollama)
- [Discollama](https://github.com/mxyng/discollama) (Discord bot inside the Ollama discord channel)
- [Continue](https://github.com/continuedev/continue)
- [Obsidian Ollama plugin](https://github.com/hinterdupfinger/obsidian-ollama)
- [Dagger Chatbot](https://github.com/samalba/dagger-chatbot)
- [LiteLLM](https://github.com/BerriAI/litellm)
- [Discord AI Bot](https://github.com/mekb-turtle/discord-ai-bot)
- [HTML UI](https://github.com/rtcfirefly/ollama-ui)
- [Typescript UI](https://github.com/ollama-interface/Ollama-Gui?tab=readme-ov-file)
- [Dumbar](https://github.com/JerrySievert/Dumbar)
- [Emacs client](https://github.com/zweifisch/ollama)

View File

@@ -31,6 +31,22 @@ func (e StatusError) Error() string {
}
}
// /api/chat
type Message struct {
Role string `json:"role"`
Content string `json:"content"`
}
type ChatRequest struct {
Model string `json:"model"`
Messages []Message `json:"messages"`
}
type ChatResponse struct {
CreatedAt time.Time `json:"created_at"`
Message Message `json:"message"`
}
type GenerateRequest struct {
Model string `json:"model"`
Prompt string `json:"prompt"`
@@ -81,22 +97,18 @@ type CopyRequest struct {
type PullRequest struct {
Name string `json:"name"`
Insecure bool `json:"insecure,omitempty"`
Username string `json:"username"`
Password string `json:"password"`
}
type ProgressResponse struct {
Status string `json:"status"`
Digest string `json:"digest,omitempty"`
Total int `json:"total,omitempty"`
Completed int `json:"completed,omitempty"`
Total int64 `json:"total,omitempty"`
Completed int64 `json:"completed,omitempty"`
}
type PushRequest struct {
Name string `json:"name"`
Insecure bool `json:"insecure,omitempty"`
Username string `json:"username"`
Password string `json:"password"`
}
type ListResponse struct {
@@ -106,7 +118,7 @@ type ListResponse struct {
type ModelResponse struct {
Name string `json:"name"`
ModifiedAt time.Time `json:"modified_at"`
Size int `json:"size"`
Size int64 `json:"size"`
Digest string `json:"digest"`
}

View File

@@ -5,7 +5,7 @@ import winston from 'winston'
import 'winston-daily-rotate-file'
import * as path from 'path'
import { analytics, id } from './telemetry'
import { v4 as uuidv4 } from 'uuid'
import { installed } from './install'
require('@electron/remote/main').initialize()
@@ -164,11 +164,11 @@ app.on('before-quit', () => {
function init() {
if (app.isPackaged) {
heartbeat()
autoUpdater.checkForUpdates()
setInterval(() => {
heartbeat()
autoUpdater.checkForUpdates()
if (!updateAvailable) {
autoUpdater.checkForUpdates()
}
}, 60 * 60 * 1000)
}
@@ -234,28 +234,26 @@ app.on('window-all-closed', () => {
}
})
// In this file you can include the rest of your app's specific main process
// code. You can also put them in separate files and import them here.
let aid = ''
try {
aid = id()
} catch (e) {}
function id(): string {
const id = store.get('id') as string
autoUpdater.setFeedURL({
url: `https://ollama.ai/api/update?os=${process.platform}&arch=${process.arch}&version=${app.getVersion()}&id=${aid}`,
})
if (id) {
return id
}
async function heartbeat() {
analytics.track({
anonymousId: aid,
event: 'heartbeat',
properties: {
version: app.getVersion(),
},
})
const uuid = uuidv4()
store.set('id', uuid)
return uuid
}
autoUpdater.setFeedURL({
url: `https://ollama.ai/api/update?os=${process.platform}&arch=${
process.arch
}&version=${app.getVersion()}&id=${id()}`,
})
autoUpdater.on('error', e => {
logger.error(`update check failed - ${e.message}`)
console.error(`update check failed - ${e.message}`)
})

View File

@@ -1,19 +0,0 @@
import { Analytics } from '@segment/analytics-node'
import { v4 as uuidv4 } from 'uuid'
import Store from 'electron-store'
const store = new Store()
export const analytics = new Analytics({ writeKey: process.env.TELEMETRY_WRITE_KEY || '<empty>' })
export function id(): string {
const id = store.get('id') as string
if (id) {
return id
}
const uuid = uuidv4()
store.set('id', uuid)
return uuid
}

View File

@@ -13,9 +13,11 @@ import (
"net"
"os"
"os/exec"
"os/signal"
"path/filepath"
"runtime"
"strings"
"syscall"
"time"
"github.com/dustin/go-humanize"
@@ -32,14 +34,23 @@ import (
"github.com/jmorganca/ollama/version"
)
type Painter struct{}
type Painter struct {
IsMultiLine bool
}
func (p Painter) Paint(line []rune, l int) []rune {
func (p Painter) Paint(line []rune, _ int) []rune {
termType := os.Getenv("TERM")
if termType == "xterm-256color" && len(line) == 0 {
prompt := "Send a message (/? for help)"
var prompt string
if p.IsMultiLine {
prompt = "Use \"\"\" to end multi-line input"
} else {
prompt = "Send a message (/? for help)"
}
return []rune(fmt.Sprintf("\033[38;5;245m%s\033[%dD\033[0m", prompt, len(prompt)))
}
// add a space and a backspace to prevent the cursor from walking up the screen
line = append(line, []rune(" \b")...)
return line
}
@@ -69,18 +80,18 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
currentDigest = resp.Digest
switch {
case strings.Contains(resp.Status, "embeddings"):
bar = progressbar.Default(int64(resp.Total), resp.Status)
bar.Set(resp.Completed)
bar = progressbar.Default(resp.Total, resp.Status)
bar.Set64(resp.Completed)
default:
// pulling
bar = progressbar.DefaultBytes(
int64(resp.Total),
resp.Total,
resp.Status,
)
bar.Set(resp.Completed)
bar.Set64(resp.Completed)
}
} else if resp.Digest == currentDigest && resp.Digest != "" {
bar.Set(resp.Completed)
bar.Set64(resp.Completed)
} else {
currentDigest = ""
if spinner != nil {
@@ -118,13 +129,9 @@ func RunHandler(cmd *cobra.Command, args []string) error {
return err
}
modelName, modelTag, ok := strings.Cut(args[0], ":")
if !ok {
modelTag = "latest"
}
canonicalModelPath := server.ParseModelPath(args[0])
for _, model := range models.Models {
if model.Name == strings.Join([]string{modelName, modelTag}, ":") {
if model.Name == canonicalModelPath.GetShortTagname() {
return RunGenerate(cmd, args)
}
}
@@ -155,13 +162,13 @@ func PushHandler(cmd *cobra.Command, args []string) error {
if resp.Digest != currentDigest && resp.Digest != "" {
currentDigest = resp.Digest
bar = progressbar.DefaultBytes(
int64(resp.Total),
resp.Total,
fmt.Sprintf("pushing %s...", resp.Digest[7:19]),
)
bar.Set(resp.Completed)
bar.Set64(resp.Completed)
} else if resp.Digest == currentDigest && resp.Digest != "" {
bar.Set(resp.Completed)
bar.Set64(resp.Completed)
} else {
currentDigest = ""
fmt.Println(resp.Status)
@@ -344,13 +351,13 @@ func pull(model string, insecure bool) error {
if resp.Digest != currentDigest && resp.Digest != "" {
currentDigest = resp.Digest
bar = progressbar.DefaultBytes(
int64(resp.Total),
resp.Total,
fmt.Sprintf("pulling %s...", resp.Digest[7:19]),
)
bar.Set(resp.Completed)
bar.Set64(resp.Completed)
} else if resp.Digest == currentDigest && resp.Digest != "" {
bar.Set(resp.Completed)
bar.Set64(resp.Completed)
} else {
currentDigest = ""
fmt.Println(resp.Status)
@@ -421,6 +428,19 @@ func generate(cmd *cobra.Command, model, prompt string) error {
wrapTerm = false
}
cancelCtx, cancel := context.WithCancel(context.Background())
defer cancel()
sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, syscall.SIGINT)
var abort bool
go func() {
<-sigChan
cancel()
abort = true
}()
var currentLineLength int
var wordBuffer string
@@ -460,7 +480,7 @@ func generate(cmd *cobra.Command, model, prompt string) error {
return nil
}
if err := client.Generate(context.Background(), &request, fn); err != nil {
if err := client.Generate(cancelCtx, &request, fn); err != nil {
if strings.Contains(err.Error(), "failed to load model") {
// tell the user to check the server log, if it exists locally
home, nestedErr := os.UserHomeDir()
@@ -472,6 +492,9 @@ func generate(cmd *cobra.Command, model, prompt string) error {
if _, nestedErr := os.Stat(logPath); nestedErr == nil {
err = fmt.Errorf("%w\nFor more details, check the error logs at %s", err, logPath)
}
} else if strings.Contains(err.Error(), "context canceled") && abort {
spinner.Finish()
return nil
}
return err
}
@@ -481,6 +504,9 @@ func generate(cmd *cobra.Command, model, prompt string) error {
}
if !latest.Done {
if abort {
return nil
}
return errors.New("unexpected end of response")
}
@@ -538,8 +564,10 @@ func generateInteractive(cmd *cobra.Command, model string) error {
fmt.Fprintln(os.Stderr, completer.Tree(" "))
}
var painter Painter
config := readline.Config{
Painter: Painter{},
Painter: &painter,
Prompt: ">>> ",
HistoryFile: filepath.Join(home, ".ollama", "history"),
AutoComplete: completer,
@@ -561,7 +589,7 @@ func generateInteractive(cmd *cobra.Command, model string) error {
return nil
case errors.Is(err, readline.ErrInterrupt):
if line == "" {
return nil
fmt.Println("Use Ctrl-D or /bye to exit.")
}
continue
@@ -575,17 +603,18 @@ func generateInteractive(cmd *cobra.Command, model string) error {
case isMultiLine:
if strings.HasSuffix(line, `"""`) {
isMultiLine = false
painter.IsMultiLine = isMultiLine
multiLineBuffer += strings.TrimSuffix(line, `"""`)
line = multiLineBuffer
multiLineBuffer = ""
scanner.SetPrompt(">>> ")
continue
} else {
multiLineBuffer += line + " "
continue
}
case strings.HasPrefix(line, `"""`):
isMultiLine = true
painter.IsMultiLine = isMultiLine
multiLineBuffer = strings.TrimPrefix(line, `"""`) + " "
scanner.SetPrompt("... ")
continue
@@ -627,6 +656,8 @@ func generateInteractive(cmd *cobra.Command, model string) error {
} else {
usage()
}
default:
fmt.Printf("Unknown command '/set %s'. Type /? for help\n", args[1])
}
} else {
usage()
@@ -637,6 +668,7 @@ func generateInteractive(cmd *cobra.Command, model string) error {
resp, err := server.GetModelInfo(model)
if err != nil {
fmt.Println("error: couldn't get model")
return err
}
switch args[1] {
@@ -651,7 +683,7 @@ func generateInteractive(cmd *cobra.Command, model string) error {
case "template":
fmt.Println(resp.Template)
default:
fmt.Println("error: unknown command")
fmt.Printf("Unknown command '/show %s'. Type /? for help\n", args[1])
}
} else {
usage()
@@ -713,6 +745,15 @@ func RunServer(cmd *cobra.Command, _ []string) error {
if err := server.PruneLayers(); err != nil {
return err
}
manifestsPath, err := server.GetManifestPath()
if err != nil {
return err
}
if err := server.PruneDirectory(manifestsPath); err != nil {
return err
}
}
return server.Serve(ln, origins)

View File

@@ -23,6 +23,10 @@ Model names follow a `model:tag` format. Some examples are `orca-mini:3b-q4_1` a
All durations are returned in nanoseconds.
### Streaming responses
Certain endpoints stream responses as JSON objects delineated with the newline (`\n`) character.
## Generate a completion
```shell

View File

@@ -14,4 +14,6 @@ OLLAMA_ORIGINS=http://192.168.1.1:*,https://example.com ollama serve
## Where are models stored?
Raw model data is stored under `~/.ollama/models`.
* macOS: Raw model data is stored under `~/.ollama/models`.
* Linux: Raw model data is stored under `/usr/share/ollama/.ollama/models`

83
docs/linux.md Normal file
View File

@@ -0,0 +1,83 @@
# Installing Ollama on Linux
> Note: A one line installer for Ollama is available by running:
>
> ```
> curl https://ollama.ai/install.sh | sh
> ```
## Download the `ollama` binary
Ollama is distributed as a self-contained binary. Download it to a directory in your PATH:
```
sudo curl -L https://ollama.ai/download/ollama-linux-amd64 -o /usr/bin/ollama
sudo chmod +x /usr/bin/ollama
```
## Start Ollama
Start Ollama by running `ollama serve`:
```
ollama serve
```
Once Ollama is running, run a model in another terminal session:
```
ollama run llama2
```
## Install CUDA drivers (optional for Nvidia GPUs)
[Download and install](https://developer.nvidia.com/cuda-downloads) CUDA.
Verify that the drivers are installed by running the following command, which should print details about your GPU:
```
nvidia-smi
```
## Adding Ollama as a startup service (optional)
Create a user for Ollama:
```
sudo useradd -r -s /bin/false -m -d /usr/share/ollama ollama
```
Create a service file in `/etc/systemd/system/ollama.service`:
```ini
[Unit]
Description=Ollama Service
After=network-online.target
[Service]
ExecStart=/usr/bin/ollama serve
User=ollama
Group=ollama
Restart=always
RestartSec=3
Environment="HOME=/usr/share/ollama"
[Install]
WantedBy=default.target
```
Then start the service:
```
sudo systemctl daemon-reload
sudo systemctl enable ollama
```
### Viewing logs
To view logs of Ollama running as a startup service, run:
```
journalctl -u ollama
```

View File

@@ -94,6 +94,7 @@ This bin file location should be specified as an absolute path or relative to th
### EMBED
The EMBED instruction is used to add embeddings of files to a model. This is useful for adding custom data that the model can reference when generating an answer. Note that currently only text files are supported, formatted with each line as one embedding.
```
FROM <model name>:<tag>
EMBED <file path>.txt
@@ -118,13 +119,14 @@ PARAMETER <parameter> <parametervalue>
| mirostat_tau | Controls the balance between coherence and diversity of the output. A lower value will result in more focused and coherent text. (Default: 5.0) | float | mirostat_tau 5.0 |
| num_ctx | Sets the size of the context window used to generate the next token. (Default: 2048) | int | num_ctx 4096 |
| num_gqa | The number of GQA groups in the transformer layer. Required for some models, for example it is 8 for llama2:70b | int | num_gqa 1 |
| num_gpu | The number of GPUs to use. On macOS it defaults to 1 to enable metal support, 0 to disable. | int | num_gpu 1 |
| num_gpu | The number of layers to send to the GPU(s). On macOS it defaults to 1 to enable metal support, 0 to disable. | int | num_gpu 50 |
| num_thread | Sets the number of threads to use during computation. By default, Ollama will detect this for optimal performance. It is recommended to set this value to the number of physical CPU cores your system has (as opposed to the logical number of cores). | int | num_thread 8 |
| repeat_last_n | Sets how far back for the model to look back to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx) | int | repeat_last_n 64 |
| repeat_penalty | Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1) | float | repeat_penalty 1.1 |
| temperature | The temperature of the model. Increasing the temperature will make the model answer more creatively. (Default: 0.8) | float | temperature 0.7 |
| stop | Sets the stop sequences to use. | string | stop "AI assistant:" |
| tfs_z | Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting. (default: 1) | float | tfs_z 1 |
| num_predict | Maximum number of tokens to predict when generating text. (Default: 128, -1 = infinite generation, -2 = fill context) | int | num_predict 42 |
| top_k | Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40) | int | top_k 40 |
| top_p | Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9) | float | top_p 0.9 |

View File

@@ -77,6 +77,7 @@ type model interface {
ModelFamily() string
ModelType() string
FileType() string
NumLayers() int64
}
type container interface {

View File

@@ -195,6 +195,16 @@ func (llm *ggufModel) Decode(r io.Reader) error {
return nil
}
func (llm *ggufModel) NumLayers() int64 {
value, exists := llm.kv[fmt.Sprintf("%s.block_count", llm.ModelFamily())]
if !exists {
return 0
}
v := value.(uint32)
return int64(v)
}
func (ggufModel) readU8(r io.Reader) uint8 {
var u8 uint8
binary.Read(r, binary.LittleEndian, &u8)

View File

@@ -64,27 +64,29 @@ func chooseRunners(workDir, runnerType string) []ModelRunner {
runnerAvailable := false // if no runner files are found in the embed, this flag will cause a fast fail
for _, r := range runners {
// find all the files in the runner's bin directory
files, err := fs.Glob(llamaCppEmbed, filepath.Join(filepath.Dir(r), "*"))
files, err := fs.Glob(llamaCppEmbed, path.Join(path.Dir(r), "*"))
if err != nil {
// this is expected, ollama may be compiled without all runners packed in
log.Printf("%s runner not found: %v", r, err)
continue
}
runnerAvailable = true
for _, f := range files {
runnerAvailable = true
srcFile, err := llamaCppEmbed.Open(f)
if err != nil {
log.Fatalf("read llama runner %s: %v", f, err)
}
defer srcFile.Close()
// create the directory in case it does not exist
// create the directory in case it does not exist, filepath.Dir() converts the file path to the OS's format
destPath := filepath.Join(workDir, filepath.Dir(f))
if err := os.MkdirAll(destPath, 0o755); err != nil {
log.Fatalf("create runner temp dir %s: %v", filepath.Dir(f), err)
}
// create the path to the destination file, filepath.Base() converts the file path to the OS's format
destFile := filepath.Join(destPath, filepath.Base(f))
_, err = os.Stat(destFile)
@@ -111,7 +113,8 @@ func chooseRunners(workDir, runnerType string) []ModelRunner {
// return the runners to try in priority order
localRunnersByPriority := []ModelRunner{}
for _, r := range runners {
localRunnersByPriority = append(localRunnersByPriority, ModelRunner{Path: path.Join(workDir, r)})
// clean the ModelRunner paths so that they match the OS we are running on
localRunnersByPriority = append(localRunnersByPriority, ModelRunner{Path: filepath.Clean(path.Join(workDir, r))})
}
return localRunnersByPriority
@@ -152,6 +155,10 @@ func (llm *llamaModel) FileType() string {
return fileType(llm.hyperparameters.FileType)
}
func (llm *llamaModel) NumLayers() int64 {
return int64(llm.hyperparameters.NumLayer)
}
type llamaHyperparameters struct {
// NumVocab is the size of the model's vocabulary.
NumVocab uint32
@@ -183,7 +190,7 @@ type llama struct {
var errNoGPU = errors.New("nvidia-smi command failed")
// CheckVRAM returns the available VRAM in MiB on Linux machines with NVIDIA GPUs
func CheckVRAM() (int, error) {
func CheckVRAM() (int64, error) {
cmd := exec.Command("nvidia-smi", "--query-gpu=memory.total", "--format=csv,noheader,nounits")
var stdout bytes.Buffer
cmd.Stdout = &stdout
@@ -192,11 +199,11 @@ func CheckVRAM() (int, error) {
return 0, errNoGPU
}
var total int
var total int64
scanner := bufio.NewScanner(&stdout)
for scanner.Scan() {
line := scanner.Text()
vram, err := strconv.Atoi(line)
vram, err := strconv.ParseInt(strings.TrimSpace(line), 10, 64)
if err != nil {
return 0, fmt.Errorf("failed to parse available VRAM: %v", err)
}
@@ -207,13 +214,13 @@ func CheckVRAM() (int, error) {
return total, nil
}
func NumGPU(opts api.Options) int {
func NumGPU(numLayer, fileSizeBytes int64, opts api.Options) int {
if opts.NumGPU != -1 {
return opts.NumGPU
}
n := 1 // default to enable metal on macOS
if runtime.GOOS == "linux" {
vram, err := CheckVRAM()
vramMib, err := CheckVRAM()
if err != nil {
if err.Error() != "nvidia-smi command failed" {
log.Print(err.Error())
@@ -221,33 +228,25 @@ func NumGPU(opts api.Options) int {
// nvidia driver not installed or no nvidia GPU found
return 0
}
// TODO: this is a very rough heuristic, better would be to calculate this based on number of layers and context size
switch {
case vram < 500:
log.Printf("WARNING: Low VRAM detected, disabling GPU")
n = 0
case vram < 1000:
n = 4
case vram < 2000:
n = 8
case vram < 4000:
n = 12
case vram < 8000:
n = 16
case vram < 12000:
n = 24
case vram < 16000:
n = 32
default:
n = 48
}
log.Printf("%d MB VRAM available, loading %d GPU layers", vram, n)
totalVramBytes := int64(vramMib) * 1024 * 1024 // 1 MiB = 1024^2 bytes
// Calculate bytes per layer
// TODO: this is a rough heuristic, better would be to calculate this based on number of layers and context size
bytesPerLayer := fileSizeBytes / numLayer
// set n to the max number of layers we can fit in VRAM
return int(totalVramBytes / bytesPerLayer)
log.Printf("%d MiB VRAM available, loading up to %d GPU layers", vramMib, n)
}
return n
// default to enable metal on macOS
return 1
}
func newLlama(model string, adapters []string, runners []ModelRunner, opts api.Options) (*llama, error) {
if _, err := os.Stat(model); err != nil {
func newLlama(model string, adapters []string, runners []ModelRunner, numLayers int64, opts api.Options) (*llama, error) {
fileInfo, err := os.Stat(model)
if err != nil {
return nil, err
}
@@ -261,7 +260,7 @@ func newLlama(model string, adapters []string, runners []ModelRunner, opts api.O
"--rope-freq-base", fmt.Sprintf("%f", opts.RopeFrequencyBase),
"--rope-freq-scale", fmt.Sprintf("%f", opts.RopeFrequencyScale),
"--batch-size", fmt.Sprintf("%d", opts.NumBatch),
"--n-gpu-layers", fmt.Sprintf("%d", NumGPU(opts)),
"--n-gpu-layers", fmt.Sprintf("%d", NumGPU(numLayers, fileInfo.Size(), opts)),
"--embedding",
}

View File

@@ -91,9 +91,9 @@ func New(workDir, model string, adapters []string, opts api.Options) (LLM, error
switch ggml.Name() {
case "gguf":
opts.NumGQA = 0 // TODO: remove this when llama.cpp runners differ enough to need separate newLlama functions
return newLlama(model, adapters, chooseRunners(workDir, "gguf"), opts)
return newLlama(model, adapters, chooseRunners(workDir, "gguf"), ggml.NumLayers(), opts)
case "ggml", "ggmf", "ggjt", "ggla":
return newLlama(model, adapters, chooseRunners(workDir, "ggml"), opts)
return newLlama(model, adapters, chooseRunners(workDir, "ggml"), ggml.NumLayers(), opts)
default:
return nil, fmt.Errorf("unknown ggml type: %s", ggml.ModelFamily())
}

21
scripts/build.sh Normal file
View File

@@ -0,0 +1,21 @@
#!/bin/sh
set -eu
usage() {
echo "usage: $(basename $0) VERSION"
exit 1
}
[ "$#" -eq 1 ] || usage
export VERSION="$1"
# build universal MacOS binary
sh $(dirname $0)/build_darwin.sh
# # build arm64 and amd64 Linux binaries
sh $(dirname $0)/build_linux.sh
# # build arm64 and amd64 Docker images
sh $(dirname $0)/build_docker.sh

View File

@@ -1,29 +1,30 @@
#!/bin/bash
#!/bin/sh
set -eu
export VERSION=${VERSION:-0.0.0}
export GOFLAGS="'-ldflags=-w -s \"-X=github.com/jmorganca/ollama/version.Version=$VERSION\" \"-X=github.com/jmorganca/ollama/server.mode=release\"'"
mkdir -p dist
GO_LDFLAGS="-X github.com/jmorganca/ollama/version.Version=$VERSION"
GO_LDFLAGS="$GO_LDFLAGS -X github.com/jmorganca/ollama/server.mode=release"
for TARGETARCH in arm64 amd64; do
GOOS=darwin GOARCH=$TARGETARCH go generate ./...
GOOS=darwin GOARCH=$TARGETARCH go build -o dist/ollama-darwin-$TARGETARCH
done
# build universal binary
GOARCH=arm64 go generate ./...
GOARCH=arm64 go build -ldflags "$GO_LDFLAGS" -o dist/ollama-darwin-arm64
rm -rf llm/llama.cpp/*/build/*/bin
GOARCH=amd64 go generate ./...
GOARCH=amd64 go build -ldflags "$GO_LDFLAGS" -o dist/ollama-darwin-amd64
lipo -create -output dist/ollama dist/ollama-darwin-arm64 dist/ollama-darwin-amd64
rm dist/ollama-darwin-amd64 dist/ollama-darwin-arm64
lipo -create -output dist/ollama dist/ollama-darwin-*
rm -f dist/ollama-darwin-*
codesign --deep --force --options=runtime --sign "$APPLE_IDENTITY" --timestamp dist/ollama
chmod +x dist/ollama
# build and sign the mac app
npm install --prefix app
npm run --prefix app make:sign
cp app/out/make/zip/darwin/universal/Ollama-darwin-universal-${VERSION:-0.0.0}.zip dist/Ollama-darwin.zip
cp app/out/make/zip/darwin/universal/Ollama-darwin-universal-$VERSION.zip dist/Ollama-darwin.zip
# sign the binary and rename it
codesign -f --timestamp -s "$APPLE_IDENTITY" --identifier ai.ollama.ollama --options=runtime dist/ollama
ditto -c -k --keepParent dist/ollama dist/temp.zip
xcrun notarytool submit dist/temp.zip --wait --timeout 10m --apple-id $APPLE_ID --password $APPLE_PASSWORD --team-id $APPLE_TEAM_ID
mv dist/ollama dist/ollama-darwin
rm dist/temp.zip
rm -f dist/temp.zip

15
scripts/build_docker.sh Normal file
View File

@@ -0,0 +1,15 @@
#!/bin/sh
set -eu
export VERSION=${VERSION:-0.0.0}
export GOFLAGS="'-ldflags=-w -s \"-X=github.com/jmorganca/ollama/version.Version=$VERSION\" \"-X=github.com/jmorganca/ollama/server.mode=release\"'"
docker buildx build \
--load \
--platform=linux/arm64,linux/amd64 \
--build-arg=VERSION \
--build-arg=GOFLAGS \
-f Dockerfile \
-t ollama \
.

View File

@@ -1,12 +1,15 @@
#!/bin/bash
#!/bin/sh
set -e
set -eu
export VERSION=${VERSION:-0.0.0}
export GOFLAGS="'-ldflags=-w -s \"-X=github.com/jmorganca/ollama/version.Version=$VERSION\" \"-X=github.com/jmorganca/ollama/server.mode=release\"'"
mkdir -p dist
for ARCH in arm64 amd64; do
docker buildx build --platform=linux/$ARCH -f Dockerfile.build . -t builder:$ARCH --load
docker create --platform linux/$ARCH --name builder builder:$ARCH
docker cp builder:/go/src/github.com/jmorganca/ollama/ollama ./dist/ollama-linux-$ARCH
docker rm builder
for TARGETARCH in arm64 amd64; do
docker buildx build --load --platform=linux/$TARGETARCH --build-arg=VERSION --build-arg=GOFLAGS -f Dockerfile.build -t builder:$TARGETARCH .
docker create --platform linux/$TARGETARCH --name builder-$TARGETARCH builder:$TARGETARCH
docker cp builder-$TARGETARCH:/go/src/github.com/jmorganca/ollama/ollama ./dist/ollama-linux-$TARGETARCH
docker rm builder-$TARGETARCH
done

View File

@@ -4,157 +4,240 @@
set -eu
check_os() {
if [ "$(uname -s)" != "Linux" ]; then
echo "This script is intended to run on Linux only."
exit 1
fi
}
status() { echo ">>> $*" >&2; }
error() { echo "ERROR $*"; exit 1; }
warning() { echo "WARNING: $*"; }
determine_architecture() {
ARCH=$(uname -m)
case $ARCH in
x86_64)
ARCH_SUFFIX="amd64"
;;
aarch64|arm64)
ARCH_SUFFIX="arm64"
;;
*)
echo "Unsupported architecture: $ARCH"
exit 1
;;
esac
}
TEMP_DIR=$(mktemp -d)
cleanup() { rm -rf $TEMP_DIR; }
trap cleanup EXIT
check_sudo() {
if [ "$(id -u)" -ne 0 ]; then
if command -v sudo >/dev/null 2>&1; then
SUDO_CMD="sudo"
echo "Downloading the ollama executable to the PATH, this will require sudo permissions."
else
echo "Error: sudo is not available. Please run as root or install sudo."
exit 1
available() { command -v $1 >/dev/null; }
require() {
local MISSING=''
for TOOL in $*; do
if ! available $TOOL; then
MISSING="$MISSING $TOOL"
fi
else
SUDO_CMD=""
fi
done
echo $MISSING
}
install_cuda_drivers() {
local os_name os_version
if [ -f "/etc/os-release" ]; then
. /etc/os-release
os_name=$ID
os_version=$VERSION_ID
else
echo "Unable to detect operating system. Skipping CUDA installation."
return 1
[ "$(uname -s)" = "Linux" ] || error 'This script is intended to run on Linux only.'
case "$(uname -m)" in
x86_64) ARCH="amd64" ;;
aarch64|arm64) ARCH="arm64" ;;
*) error "Unsupported architecture: $ARCH" ;;
esac
SUDO=
if [ "$(id -u)" -ne 0 ]; then
# Running as root, no need for sudo
if ! available sudo; then
error "This script requires superuser permissions. Please re-run as root."
fi
# based on https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#package-manager-installation
case $os_name in
CentOS)
$SUDO_CMD yum install yum-utils
$SUDO_CMD yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo
$SUDO_CMD yum clean all
$SUDO_CMD yum -y install nvidia-driver-latest-dkms
$SUDO_CMD yum -y install cuda-driver
$SUDO_CMD yum install kernel-devel-$(uname -r) kernel-headers-$(uname -r)
$SUDO_CMD dkms status | awk -F: '/added/ { print $1 }' | xargs -n1 $SUDO_CMD dkms install
$SUDO_CMD modprobe nvidia
;;
ubuntu)
case $os_version in
20.04)
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.1-1_all.deb
;;
22.04)
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
;;
*)
echo "Skipping automatic CUDA installation, not supported for Ubuntu ($os_version)."
return
;;
esac
$SUDO_CMD dpkg -i cuda-keyring_1.1-1_all.deb
$SUDO_CMD apt-get update
$SUDO_CMD apt-get -y install cuda-drivers
;;
RedHatEnterprise*|Kylin|Fedora|SLES|openSUSE*|Microsoft|Debian)
echo "NVIDIA CUDA drivers may not be installed, you can install them from: https://developer.nvidia.com/cuda-downloads"
;;
*)
echo "Unsupported or unknown distribution, skipping GPU CUDA driver install: $os_name"
;;
esac
}
SUDO="sudo"
fi
check_install_cuda_drivers() {
if lspci -d '10de:' | grep 'NVIDIA' >/dev/null; then
# NVIDIA Corporation [10de] device is available
if command -v nvidia-smi >/dev/null 2>&1; then
CUDA_VERSION=$(nvidia-smi | grep -o "CUDA Version: [0-9]*\.[0-9]*")
if [ -z "$CUDA_VERSION" ]; then
echo "Warning: NVIDIA-SMI is available, but the CUDA version cannot be detected. Installing CUDA drivers..."
install_cuda_drivers
else
echo "Detected CUDA version $CUDA_VERSION"
fi
else
echo "Warning: NVIDIA GPU detected but NVIDIA-SMI is not available. Installing CUDA drivers..."
install_cuda_drivers
fi
else
echo "No NVIDIA GPU detected. Skipping driver installation."
fi
}
NEEDS=$(require curl awk grep sed tee xargs)
if [ -n "$NEEDS" ]; then
status "ERROR: The following tools are required but missing:"
for NEED in $NEEDS; do
echo " - $NEED"
done
exit 1
fi
download_ollama() {
$SUDO_CMD mkdir -p /usr/bin
$SUDO_CMD curl -fsSL -o /usr/bin/ollama "https://ollama.ai/download/latest/ollama-linux-$ARCH_SUFFIX"
}
status "Downloading ollama..."
curl --fail --show-error --location --progress-bar -o $TEMP_DIR/ollama "https://ollama.ai/download/ollama-linux-$ARCH"
for BINDIR in /usr/local/bin /usr/bin /bin; do
echo $PATH | grep -q $BINDIR && break || continue
done
status "Installing ollama to $BINDIR..."
$SUDO install -o0 -g0 -m755 -d $BINDIR
$SUDO install -o0 -g0 -m755 $TEMP_DIR/ollama $BINDIR/ollama
install_success() { status 'Install complete. Run "ollama" from the command line.'; }
trap install_success EXIT
# Everything from this point onwards is optional.
configure_systemd() {
if command -v systemctl >/dev/null 2>&1; then
$SUDO_CMD useradd -r -s /bin/false -m -d /home/ollama ollama 2>/dev/null
if ! id ollama >/dev/null 2>&1; then
status "Creating ollama user..."
$SUDO useradd -r -s /bin/false -m -d /usr/share/ollama ollama
fi
echo "Creating systemd service file for ollama..."
cat <<EOF | $SUDO_CMD tee /etc/systemd/system/ollama.service >/dev/null
status "Creating ollama systemd service..."
cat <<EOF | $SUDO tee /etc/systemd/system/ollama.service >/dev/null
[Unit]
Description=Ollama Service
After=network-online.target
[Service]
ExecStart=/usr/bin/ollama serve
ExecStart=$BINDIR/ollama serve
User=ollama
Group=ollama
Restart=always
RestartSec=3
Environment="HOME=/home/ollama"
Environment="HOME=/usr/share/ollama"
Environment="PATH=$PATH"
[Install]
WantedBy=default.target
EOF
echo "Reloading systemd and enabling ollama service..."
if [ "$(systemctl is-system-running || echo 'not running')" = 'running' ]; then
$SUDO_CMD systemctl daemon-reload
$SUDO_CMD systemctl enable ollama
$SUDO_CMD systemctl restart ollama
fi
else
echo "Run 'ollama serve' from the command line to start the service."
fi
SYSTEMCTL_RUNNING="$(systemctl is-system-running || true)"
case $SYSTEMCTL_RUNNING in
running|degraded)
status "Enabling and starting ollama service..."
$SUDO systemctl daemon-reload
$SUDO systemctl enable ollama
start_service() { $SUDO systemctl restart ollama; }
trap start_service EXIT
;;
esac
}
main() {
check_os
determine_architecture
check_sudo
download_ollama
if available systemctl; then
configure_systemd
check_install_cuda_drivers
echo "Installation complete. You can now run 'ollama' from the command line."
fi
if ! available lspci && ! available lshw; then
warning "Unable to detect NVIDIA GPU. Install lspci or lshw to automatically detect and install NVIDIA CUDA drivers."
exit 0
fi
check_gpu() {
case $1 in
lspci) available lspci && lspci -d '10de:' | grep -q 'NVIDIA' || return 1 ;;
lshw) available lshw && $SUDO lshw -c display -numeric | grep -q 'vendor: .* \[10DE\]' || return 1 ;;
nvidia-smi) available nvidia-smi || return 1 ;;
esac
}
main
if check_gpu nvidia-smi; then
status "NVIDIA GPU installed."
exit 0
fi
if ! check_gpu lspci && ! check_gpu lshw; then
warning "No NVIDIA GPU detected. Ollama will run in CPU-only mode."
exit 0
fi
# ref: https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#rhel-7-centos-7
# ref: https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#rhel-8-rocky-8
# ref: https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#rhel-9-rocky-9
# ref: https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#fedora
install_cuda_driver_yum() {
status 'Installing NVIDIA repository...'
case $PACKAGE_MANAGER in
yum)
$SUDO $PACKAGE_MANAGER -y install yum-utils
$SUDO $PACKAGE_MANAGER-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m)/cuda-$1$2.repo
;;
dnf)
$SUDO $PACKAGE_MANAGER config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m)/cuda-$1$2.repo
;;
esac
case $1 in
rhel)
status 'Installing EPEL repository...'
# EPEL is required for third-party dependencies such as dkms and libvdpau
$SUDO $PACKAGE_MANAGER -y install https://dl.fedoraproject.org/pub/epel/epel-release-latest-$2.noarch.rpm || true
;;
esac
status 'Installing CUDA driver...'
if [ "$1" = 'centos' ] || [ "$1$2" = 'rhel7' ]; then
$SUDO $PACKAGE_MANAGER -y install nvidia-driver-latest-dkms
fi
$SUDO $PACKAGE_MANAGER -y install cuda-drivers
}
# ref: https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#ubuntu
# ref: https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#debian
install_cuda_driver_apt() {
status 'Installing NVIDIA repository...'
curl -fsSL -o $TEMP_DIR/cuda-keyring.deb https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m)/cuda-keyring_1.1-1_all.deb
case $1 in
debian)
status 'Enabling contrib sources...'
$SUDO sed 's/main/contrib/' < /etc/apt/sources.list | sudo tee /etc/apt/sources.list.d/contrib.list > /dev/null
;;
esac
status 'Installing CUDA driver...'
$SUDO dpkg -i $TEMP_DIR/cuda-keyring.deb
$SUDO apt-get update
[ -n "$SUDO" ] && SUDO_E="$SUDO -E" || SUDO_E=
DEBIAN_FRONTEND=noninteractive $SUDO_E apt-get -y install cuda-drivers -q
}
if [ ! -f "/etc/os-release" ]; then
error "Unknown distribution. Skipping CUDA installation."
fi
. /etc/os-release
OS_NAME=$ID
OS_VERSION=$VERSION_ID
PACKAGE_MANAGER=
for PACKAGE_MANAGER in dnf yum apt-get; do
if available $PACKAGE_MANAGER; then
break
fi
done
if [ -z "$PACKAGE_MANAGER" ]; then
error "Unknown package manager. Skipping CUDA installation."
fi
if ! check_gpu nvidia-smi || [ -z "$(nvidia-smi | grep -o "CUDA Version: [0-9]*\.[0-9]*")" ]; then
case $OS_NAME in
centos|rhel) install_cuda_driver_yum 'rhel' $OS_VERSION ;;
rocky) install_cuda_driver_yum 'rhel' $(echo $OS_VERSION | cut -c1) ;;
fedora) install_cuda_driver_yum $OS_NAME $OS_VERSION ;;
amzn) install_cuda_driver_yum 'fedora' '35' ;;
debian) install_cuda_driver_apt $OS_NAME $OS_VERSION ;;
ubuntu) install_cuda_driver_apt $OS_NAME $(echo $OS_VERSION | sed 's/\.//') ;;
*) exit ;;
esac
fi
if ! lsmod | grep -q nvidia; then
KERNEL_RELEASE="$(uname -r)"
case $OS_NAME in
centos|rhel|rocky|amzn) $SUDO $PACKAGE_MANAGER -y install kernel-devel-$KERNEL_RELEASE kernel-headers-$KERNEL_RELEASE ;;
fedora) $SUDO $PACKAGE_MANAGER -y install kernel-devel-$KERNEL_RELEASE ;;
debian|ubuntu) $SUDO apt-get -y install linux-headers-$KERNEL_RELEASE ;;
*) exit ;;
esac
NVIDIA_CUDA_VERSION=$($SUDO dkms status | awk -F: '/added/ { print $1 }')
if [ -n "$NVIDIA_CUDA_VERSION" ]; then
$SUDO dkms install $NVIDIA_CUDA_VERSION
fi
if lsmod | grep -q nouveau; then
status 'Reboot to complete NVIDIA CUDA driver install.'
exit 0
fi
$SUDO modprobe nvidia
fi
status "NVIDIA CUDA drivers installed."

View File

@@ -46,8 +46,8 @@ func downloadBlob(ctx context.Context, opts downloadOpts) error {
// we already have the file, so return
opts.fn(api.ProgressResponse{
Digest: opts.digest,
Total: int(fi.Size()),
Completed: int(fi.Size()),
Total: fi.Size(),
Completed: fi.Size(),
})
return nil
@@ -93,8 +93,8 @@ func monitorDownload(ctx context.Context, opts downloadOpts, f *FileDownload) er
// successful download while monitoring
opts.fn(api.ProgressResponse{
Digest: f.Digest,
Total: int(fi.Size()),
Completed: int(fi.Size()),
Total: fi.Size(),
Completed: fi.Size(),
})
return true, false, nil
}
@@ -109,8 +109,8 @@ func monitorDownload(ctx context.Context, opts downloadOpts, f *FileDownload) er
opts.fn(api.ProgressResponse{
Status: fmt.Sprintf("downloading %s", f.Digest),
Digest: f.Digest,
Total: int(f.Total),
Completed: int(f.Completed),
Total: f.Total,
Completed: f.Completed,
})
return false, false, nil
}()
@@ -129,8 +129,8 @@ func monitorDownload(ctx context.Context, opts downloadOpts, f *FileDownload) er
}
var (
chunkSize = 1024 * 1024 // 1 MiB in bytes
errDownload = fmt.Errorf("download failed")
chunkSize int64 = 1024 * 1024 // 1 MiB in bytes
errDownload = fmt.Errorf("download failed")
)
// doDownload downloads a blob from the registry and stores it in the blobs directory
@@ -147,7 +147,7 @@ func doDownload(ctx context.Context, opts downloadOpts, f *FileDownload) error {
default:
size = fi.Size()
// Ensure the size is divisible by the chunk size by removing excess bytes
size -= size % int64(chunkSize)
size -= size % chunkSize
err := os.Truncate(f.FilePath+"-partial", size)
if err != nil {
@@ -200,8 +200,8 @@ outerLoop:
opts.fn(api.ProgressResponse{
Status: fmt.Sprintf("downloading %s", f.Digest),
Digest: f.Digest,
Total: int(f.Total),
Completed: int(f.Completed),
Total: f.Total,
Completed: f.Completed,
})
if f.Completed >= f.Total {
@@ -213,8 +213,8 @@ outerLoop:
opts.fn(api.ProgressResponse{
Status: fmt.Sprintf("error renaming file: %v", err),
Digest: f.Digest,
Total: int(f.Total),
Completed: int(f.Completed),
Total: f.Total,
Completed: f.Completed,
})
return err
}
@@ -223,7 +223,7 @@ outerLoop:
}
}
n, err := io.CopyN(out, resp.Body, int64(chunkSize))
n, err := io.CopyN(out, resp.Body, chunkSize)
if err != nil && !errors.Is(err, io.EOF) {
return fmt.Errorf("%w: %w", errDownload, err)
}

View File

@@ -54,6 +54,54 @@ type Model struct {
Embeddings []vector.Embedding
}
func (m *Model) ChatPrompt(messages []api.Message) (string, error) {
tmpl, err := template.New("").Parse(m.Template)
if err != nil {
return "", err
}
var vars struct {
System string
Prompt string
First bool
}
vars.First = true
var sb strings.Builder
flush := func() {
tmpl.Execute(&sb, vars)
vars.System = ""
vars.Prompt = ""
}
// build the chat history from messages
for _, m := range messages {
if m.Role == "system" {
if vars.System != "" {
flush()
}
vars.System = m.Content
}
if m.Role == "user" {
if vars.Prompt != "" {
flush()
}
vars.Prompt = m.Content
}
if m.Role == "assistant" {
flush()
sb.Write([]byte(m.Content))
}
}
flush()
return sb.String(), nil
}
func (m *Model) Prompt(request api.GenerateRequest, embedding string) (string, error) {
t := m.Template
if request.Template != "" {
@@ -103,7 +151,7 @@ type ManifestV2 struct {
type Layer struct {
MediaType string `json:"mediaType"`
Digest string `json:"digest"`
Size int `json:"size"`
Size int64 `json:"size"`
From string `json:"from,omitempty"`
}
@@ -129,11 +177,11 @@ type RootFS struct {
DiffIDs []string `json:"diff_ids"`
}
func (m *ManifestV2) GetTotalSize() int {
var total int
func (m *ManifestV2) GetTotalSize() (total int64) {
for _, layer := range m.Layers {
total += layer.Size
}
total += m.Config.Size
return total
}
@@ -649,8 +697,8 @@ func embeddingLayers(workDir string, e EmbeddingParams) ([]*LayerReader, error)
e.fn(api.ProgressResponse{
Status: fmt.Sprintf("creating embeddings for file %s", filePath),
Digest: fileDigest,
Total: len(data) - 1,
Completed: i,
Total: int64(len(data) - 1),
Completed: int64(i),
})
if len(existing[d]) > 0 {
// already have an embedding for this line
@@ -675,7 +723,7 @@ func embeddingLayers(workDir string, e EmbeddingParams) ([]*LayerReader, error)
Layer: Layer{
MediaType: "application/vnd.ollama.image.embed",
Digest: digest,
Size: r.Len(),
Size: r.Size(),
},
Reader: r,
}
@@ -1005,6 +1053,39 @@ func PruneLayers() error {
return nil
}
func PruneDirectory(path string) error {
info, err := os.Lstat(path)
if err != nil {
return err
}
if info.IsDir() && info.Mode()&os.ModeSymlink == 0 {
entries, err := os.ReadDir(path)
if err != nil {
return err
}
for _, entry := range entries {
if err := PruneDirectory(filepath.Join(path, entry.Name())); err != nil {
return err
}
}
entries, err = os.ReadDir(path)
if err != nil {
return err
}
if len(entries) > 0 {
return nil
}
return os.Remove(path)
}
return nil
}
func DeleteModel(name string) error {
mp := ParseModelPath(name)
manifest, _, err := GetManifest(mp)
@@ -1356,14 +1437,14 @@ func createConfigLayer(config ConfigV2, layers []string) (*LayerReader, error) {
}
// GetSHA256Digest returns the SHA256 hash of a given buffer and returns it, and the size of buffer
func GetSHA256Digest(r io.Reader) (string, int) {
func GetSHA256Digest(r io.Reader) (string, int64) {
h := sha256.New()
n, err := io.Copy(h, r)
if err != nil {
log.Fatal(err)
}
return fmt.Sprintf("sha256:%x", h.Sum(nil)), int(n)
return fmt.Sprintf("sha256:%x", h.Sum(nil)), n
}
// Function to check if a blob already exists in the Docker registry

View File

@@ -4,9 +4,9 @@ import "testing"
func TestParseModelPath(t *testing.T) {
tests := []struct {
name string
arg string
want ModelPath
name string
arg string
want ModelPath
}{
{
"full path https",

View File

@@ -156,6 +156,54 @@ func load(ctx context.Context, workDir string, model *Model, reqOpts map[string]
return nil
}
func ChatModelHandler(c *gin.Context) {
loaded.mu.Lock()
defer loaded.mu.Unlock()
var req api.ChatRequest
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
}
model, err := GetModel(req.Model)
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
}
prompt, err := model.ChatPrompt(req.Messages)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
var response string
fn := func(r api.GenerateResponse) {
response += r.Response
}
workDir := c.GetString("workDir")
if err := load(c.Request.Context(), workDir, model, nil, defaultSessionDuration); err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
fmt.Println(prompt)
if err := loaded.llm.Predict(c.Request.Context(), []int{}, prompt, fn); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
}
c.JSON(http.StatusOK, api.ChatResponse{
Message: api.Message{
Role: "assistant",
Content: response,
},
CreatedAt: time.Now().UTC(),
})
}
func GenerateHandler(c *gin.Context) {
loaded.mu.Lock()
defer loaded.mu.Unlock()
@@ -223,7 +271,8 @@ func GenerateHandler(c *gin.Context) {
ch <- r
}
if req.Prompt == "" {
// an empty request loads the model
if req.Prompt == "" && req.Template == "" && req.System == "" {
ch <- api.GenerateResponse{Model: req.Model, Done: true}
} else {
if err := loaded.llm.Predict(c.Request.Context(), req.Context, prompt, fn); err != nil {
@@ -291,8 +340,6 @@ func PullModelHandler(c *gin.Context) {
regOpts := &RegistryOptions{
Insecure: req.Insecure,
Username: req.Username,
Password: req.Password,
}
ctx, cancel := context.WithCancel(c.Request.Context())
@@ -322,8 +369,6 @@ func PushModelHandler(c *gin.Context) {
regOpts := &RegistryOptions{
Insecure: req.Insecure,
Username: req.Username,
Password: req.Password,
}
ctx := context.Background()
@@ -377,6 +422,18 @@ func DeleteModelHandler(c *gin.Context) {
}
return
}
manifestsPath, err := GetManifestPath()
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
if err := PruneDirectory(manifestsPath); err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
c.JSON(http.StatusOK, nil)
}
@@ -543,6 +600,7 @@ func Serve(ln net.Listener, allowOrigins []string) error {
},
)
r.POST("/api/chat", ChatModelHandler)
r.POST("/api/pull", PullModelHandler)
r.POST("/api/generate", GenerateHandler)
r.POST("/api/embeddings", EmbeddingHandler)
@@ -601,6 +659,7 @@ func streamResponse(c *gin.Context, ch chan any) {
return false
}
// Delineate chunks with new-line delimiter
bts = append(bts, '\n')
if _, err := w.Write(bts); err != nil {
log.Printf("streamResponse: w.Write failed with %s", err)

View File

@@ -15,8 +15,8 @@ import (
)
const (
redirectChunkSize = 1024 * 1024 * 1024
regularChunkSize = 95 * 1024 * 1024
redirectChunkSize int64 = 1024 * 1024 * 1024
regularChunkSize int64 = 95 * 1024 * 1024
)
func startUpload(ctx context.Context, mp ModelPath, layer *Layer, regOpts *RegistryOptions) (*url.URL, int64, error) {
@@ -48,7 +48,7 @@ func startUpload(ctx context.Context, mp ModelPath, layer *Layer, regOpts *Regis
return nil, 0, err
}
return locationURL, int64(chunkSize), nil
return locationURL, chunkSize, nil
}
func uploadBlob(ctx context.Context, requestURL *url.URL, layer *Layer, chunkSize int64, regOpts *RegistryOptions, fn func(api.ProgressResponse)) error {
@@ -73,10 +73,10 @@ func uploadBlob(ctx context.Context, requestURL *url.URL, layer *Layer, chunkSiz
fn: fn,
}
for offset := int64(0); offset < int64(layer.Size); {
chunk := int64(layer.Size) - offset
if chunk > int64(chunkSize) {
chunk = int64(chunkSize)
for offset := int64(0); offset < layer.Size; {
chunk := layer.Size - offset
if chunk > chunkSize {
chunk = chunkSize
}
resp, err := uploadBlobChunk(ctx, http.MethodPatch, requestURL, f, offset, chunk, regOpts, &pw)
@@ -85,7 +85,7 @@ func uploadBlob(ctx context.Context, requestURL *url.URL, layer *Layer, chunkSiz
Status: fmt.Sprintf("error uploading chunk: %v", err),
Digest: layer.Digest,
Total: layer.Size,
Completed: int(offset),
Completed: offset,
})
return err
@@ -127,7 +127,7 @@ func uploadBlob(ctx context.Context, requestURL *url.URL, layer *Layer, chunkSiz
}
func uploadBlobChunk(ctx context.Context, method string, requestURL *url.URL, r io.ReaderAt, offset, limit int64, opts *RegistryOptions, pw *ProgressWriter) (*http.Response, error) {
sectionReader := io.NewSectionReader(r, int64(offset), limit)
sectionReader := io.NewSectionReader(r, offset, limit)
headers := make(http.Header)
headers.Set("Content-Type", "application/octet-stream")
@@ -152,7 +152,7 @@ func uploadBlobChunk(ctx context.Context, method string, requestURL *url.URL, r
return nil, err
}
pw.completed = int(offset)
pw.completed = offset
if _, err := uploadBlobChunk(ctx, http.MethodPut, location, r, offset, limit, nil, pw); err != nil {
// retry
log.Printf("retrying redirected upload: %v", err)
@@ -170,7 +170,7 @@ func uploadBlobChunk(ctx context.Context, method string, requestURL *url.URL, r
opts.Token = token
pw.completed = int(offset)
pw.completed = offset
sectionReader = io.NewSectionReader(r, offset, limit)
continue
case resp.StatusCode >= http.StatusBadRequest:
@@ -187,19 +187,19 @@ func uploadBlobChunk(ctx context.Context, method string, requestURL *url.URL, r
type ProgressWriter struct {
status string
digest string
bucket int
completed int
total int
bucket int64
completed int64
total int64
fn func(api.ProgressResponse)
}
func (pw *ProgressWriter) Write(b []byte) (int, error) {
n := len(b)
pw.bucket += n
pw.completed += n
pw.bucket += int64(n)
// throttle status updates to not spam the client
if pw.bucket >= 1024*1024 || pw.completed >= pw.total {
if pw.bucket >= 1024*1024 || pw.completed+pw.bucket >= pw.total {
pw.completed += pw.bucket
pw.fn(api.ProgressResponse{
Status: pw.status,
Digest: pw.digest,