Compare commits

...

4 Commits

Author SHA1 Message Date
Jeffrey Morgan
1d78d96fc6 remove .First 2023-11-15 18:07:13 -05:00
Michael Yang
686f85d6ca Merge pull request #1132 from jmorganca/mxyng/human-bytes
replace go-humanize with format.HumanBytes
2023-11-15 09:46:21 -08:00
bnodnarb
85951d25ef Created tutorial for running Ollama on NVIDIA Jetson devices (#1098) 2023-11-15 12:32:37 -05:00
Michael Yang
01ea6002c4 replace go-humanize with format.HumanBytes 2023-11-14 14:57:41 -08:00
11 changed files with 46 additions and 20 deletions

View File

@@ -20,7 +20,6 @@ import (
"syscall"
"time"
"github.com/dustin/go-humanize"
"github.com/olekukonko/tablewriter"
"github.com/spf13/cobra"
"golang.org/x/crypto/ssh"
@@ -173,7 +172,7 @@ func ListHandler(cmd *cobra.Command, args []string) error {
for _, m := range models.Models {
if len(args) == 0 || strings.HasPrefix(m.Name, args[0]) {
data = append(data, []string{m.Name, m.Digest[:12], humanize.Bytes(uint64(m.Size)), format.HumanTime(m.ModifiedAt, "Never")})
data = append(data, []string{m.Name, m.Digest[:12], format.HumanBytes(m.Size), format.HumanTime(m.ModifiedAt, "Never")})
}
}

View File

@@ -385,9 +385,9 @@ curl http://localhost:11434/api/show -d '{
```json
{
"license": "<contents of license block>",
"modelfile": "# Modelfile generated by \"ollama show\"\n# To build a new Modelfile based on this one, replace the FROM line with:\n# FROM llama2:latest\n\nFROM /Users/username/.ollama/models/blobs/sha256:8daa9615cce30c259a9555b1cc250d461d1bc69980a274b44d7eda0be78076d8\nTEMPLATE \"\"\"[INST] {{ if and .First .System }}<<SYS>>{{ .System }}<</SYS>>\n\n{{ end }}{{ .Prompt }} [/INST] \"\"\"\nSYSTEM \"\"\"\"\"\"\nPARAMETER stop [INST]\nPARAMETER stop [/INST]\nPARAMETER stop <<SYS>>\nPARAMETER stop <</SYS>>\n",
"modelfile": "# Modelfile generated by \"ollama show\"\n# To build a new Modelfile based on this one, replace the FROM line with:\n# FROM llama2:latest\n\nFROM /Users/username/.ollama/models/blobs/sha256:8daa9615cce30c259a9555b1cc250d461d1bc69980a274b44d7eda0be78076d8\nTEMPLATE \"\"\"[INST] <<SYS>>{{ .System }}<</SYS>>\n\n{{ .Prompt }} [/INST] \"\"\"\nSYSTEM \"\"\"\"\"\"\nPARAMETER stop [INST]\nPARAMETER stop [/INST]\nPARAMETER stop <<SYS>>\nPARAMETER stop <</SYS>>\n",
"parameters": "stop [INST]\nstop [/INST]\nstop <<SYS>>\nstop <</SYS>>",
"template": "[INST] {{ if and .First .System }}<<SYS>>{{ .System }}<</SYS>>\n\n{{ end }}{{ .Prompt }} [/INST] "
"template": "[INST] <<SYS>>{{ .System }}<</SYS>>\n\n{{ .Prompt }} [/INST] "
}
```

View File

@@ -129,14 +129,11 @@ PARAMETER <parameter> <parametervalue>
| --------------- | ------------------------------------------------------------------------------------------------------------ |
| `{{ .System }}` | The system prompt used to specify custom behavior, this must also be set in the Modelfile as an instruction. |
| `{{ .Prompt }}` | The incoming prompt, this is not specified in the model file and will be set based on input. |
| `{{ .First }}` | A boolean value used to render specific template information for the first generation of a session. |
```modelfile
TEMPLATE """
{{- if .First }}
### System:
{{ .System }}
{{- end }}
### User:
{{ .Prompt }}

View File

@@ -4,5 +4,6 @@ Here is a list of ways you can use Ollama with other tools to build interesting
- [Using LangChain with Ollama in JavaScript](./tutorials/langchainjs.md)
- [Using LangChain with Ollama in Python](./tutorials/langchainpy.md)
- [Running Ollama on NVIDIA Jetson Devices](./tutorials/nvidia-jetson.md)
Also be sure to check out the [examples](../examples) directory for more ways to use Ollama.
Also be sure to check out the [examples](../examples) directory for more ways to use Ollama.

View File

@@ -0,0 +1,38 @@
# Running Ollama on NVIDIA Jetson Devices
With some minor configuration, Ollama runs well on [NVIDIA Jetson Devices](https://www.nvidia.com/en-us/autonomous-machines/embedded-systems/). The following has been tested on [JetPack 5.1.2](https://developer.nvidia.com/embedded/jetpack).
NVIDIA Jetson devices are Linux-based embedded AI computers that are purpose-built for AI applications.
Jetsons have an integrated GPU that is wired directly to the memory controller of the machine. For this reason, the `nvidia-smi` command is unrecognized, and Ollama proceeds to operate in "CPU only"
mode. This can be verified by using a monitoring tool like jtop.
In order to address this, we simply pass the path to the Jetson's pre-installed CUDA libraries into `ollama serve` (while in a tmux session). We then hardcode the num_gpu parameters into a cloned
version of our target model.
Prerequisites:
- curl
- tmux
Here are the steps:
- Install Ollama via standard Linux command (ignore the 404 error): `curl https://ollama.ai/install.sh | sh`
- Stop the Ollama service: `sudo systemctl stop ollama`
- Start Ollama serve in a tmux session called ollama_jetson and reference the CUDA libraries path: `tmux has-session -t ollama_jetson 2>/dev/null || tmux new-session -d -s ollama_jetson
'LD_LIBRARY_PATH=/usr/local/cuda/lib64 ollama serve'`
- Pull the model you want to use (e.g. mistral): `ollama pull mistral`
- Create a new Modelfile specifically for enabling GPU support on the Jetson: `touch ModelfileMistralJetson`
- In the ModelfileMistralJetson file, specify the FROM model and the num_gpu PARAMETER as shown below:
```
FROM mistral
PARAMETER num_gpu 999
```
- Create a new model from your Modelfile: `ollama create mistral-jetson -f ./ModelfileMistralJetson`
- Run the new model: `ollama run mistral-jetson`
If you run a monitoring tool like jtop you should now see that Ollama is using the Jetson's integrated GPU.
And that's it!

View File

@@ -3,10 +3,8 @@
FROM orca
TEMPLATE """
{{- if .First }}
### System:
{{ .System }}
{{- end }}
### User:
I hate it when my phone dies
### Response:

View File

@@ -3,10 +3,8 @@
This is a simple sentiments analyzer using the Orca model. When you pull Orca from the registry, it has a Template already defined that looks like this:
```Modelfile
{{- if .First }}
### System:
{{ .System }}
{{- end }}
### User:
{{ .Prompt }}

View File

@@ -12,11 +12,11 @@ const (
func HumanBytes(b int64) string {
switch {
case b > GigaByte:
return fmt.Sprintf("%d GB", b/GigaByte)
return fmt.Sprintf("%.1f GB", float64(b)/GigaByte)
case b > MegaByte:
return fmt.Sprintf("%d MB", b/MegaByte)
return fmt.Sprintf("%.1f MB", float64(b)/MegaByte)
case b > KiloByte:
return fmt.Sprintf("%d KB", b/KiloByte)
return fmt.Sprintf("%.1f KB", float64(b)/KiloByte)
default:
return fmt.Sprintf("%d B", b)
}

1
go.mod
View File

@@ -3,7 +3,6 @@ module github.com/jmorganca/ollama
go 1.20
require (
github.com/dustin/go-humanize v1.0.1
github.com/emirpasic/gods v1.18.1
github.com/gin-gonic/gin v1.9.1
github.com/mattn/go-runewidth v0.0.14

2
go.sum
View File

@@ -9,8 +9,6 @@ github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ3
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
github.com/emirpasic/gods v1.18.1 h1:FXtiHYKDGKCW2KzwZKx0iC0PQmdlorYgdFG9jPXJ1Bc=
github.com/emirpasic/gods v1.18.1/go.mod h1:8tpGGwCnJ5H4r6BWwaV6OrWmMoPhUl5jm/FMNAnJvWQ=
github.com/gabriel-vasile/mimetype v1.4.2 h1:w5qFW6JKBz9Y393Y4q372O9A7cUSequkh1Q7OhCmWKU=

View File

@@ -60,12 +60,10 @@ func (m *Model) Prompt(request api.GenerateRequest) (string, error) {
}
var vars struct {
First bool
System string
Prompt string
}
vars.First = len(request.Context) == 0
vars.System = m.System
vars.Prompt = request.Prompt