Compare commits

..

7 Commits
api ... v0.1.1

Author SHA1 Message Date
Patrick Devine
1852755154 show a default message when license/parameters/system prompt/template aren't specified (#681) 2023-10-02 14:34:52 -07:00
Bruce MacDonald
b1f7123301 clean up num_gpu calculation code (#673) 2023-10-02 14:53:42 -04:00
Bruce MacDonald
1fbf3585d6 Relay default values to llama runner (#672)
* include seed in params for llama.cpp server and remove empty filter for temp

* relay default predict options to llama.cpp

- reorganize options to match predict request for readability

* omit empty stop

---------

Co-authored-by: hallh <hallh@users.noreply.github.com>
2023-10-02 14:53:16 -04:00
Patrick Devine
99d5161e8a don't wordwrap when stdout is redirected or piped (#662) 2023-10-02 11:50:55 -07:00
Michael
ea8380be45 add community project: Chatbot Ollama
add community project: Chatbot Ollama by @ivanfioravanti
2023-10-02 09:04:31 -07:00
Jeffrey Morgan
4f25092dc1 fix build_docker.sh permissions 2023-10-01 16:42:32 -07:00
Jiayu Liu
4fc10acce9 add some missing code directives in docs (#664) 2023-10-01 11:51:01 -07:00
11 changed files with 129 additions and 214 deletions

View File

@@ -217,6 +217,7 @@ curl -X POST http://localhost:11434/api/generate -d '{
- [Dagger Chatbot](https://github.com/samalba/dagger-chatbot)
- [LiteLLM](https://github.com/BerriAI/litellm)
- [Discord AI Bot](https://github.com/mekb-turtle/discord-ai-bot)
- [Chatbot UI](https://github.com/ivanfioravanti/chatbot-ollama)
- [HTML UI](https://github.com/rtcfirefly/ollama-ui)
- [Typescript UI](https://github.com/ollama-interface/Ollama-Gui?tab=readme-ov-file)
- [Dumbar](https://github.com/JerrySievert/Dumbar)

View File

@@ -31,22 +31,6 @@ func (e StatusError) Error() string {
}
}
// /api/chat
type Message struct {
Role string `json:"role"`
Content string `json:"content"`
}
type ChatRequest struct {
Model string `json:"model"`
Messages []Message `json:"messages"`
}
type ChatResponse struct {
CreatedAt time.Time `json:"created_at"`
Message Message `json:"message"`
}
type GenerateRequest struct {
Model string `json:"model"`
Prompt string `json:"prompt"`
@@ -296,38 +280,38 @@ func (opts *Options) FromMap(m map[string]interface{}) error {
func DefaultOptions() Options {
return Options{
Seed: -1,
UseNUMA: false,
NumCtx: 2048,
NumKeep: -1,
NumBatch: 512,
NumGPU: -1, // -1 here indicates that NumGPU should be set dynamically
NumGQA: 1,
LowVRAM: false,
F16KV: true,
UseMMap: true,
UseMLock: false,
RopeFrequencyBase: 10000.0,
RopeFrequencyScale: 1.0,
EmbeddingOnly: true,
RepeatLastN: 64,
RepeatPenalty: 1.1,
FrequencyPenalty: 0.0,
PresencePenalty: 0.0,
// options set on request to runner
NumPredict: -1,
NumKeep: -1,
Temperature: 0.8,
TopK: 40,
TopP: 0.9,
TFSZ: 1.0,
TypicalP: 1.0,
RepeatLastN: 64,
RepeatPenalty: 1.1,
PresencePenalty: 0.0,
FrequencyPenalty: 0.0,
Mirostat: 0,
MirostatTau: 5.0,
MirostatEta: 0.1,
PenalizeNewline: true,
Seed: -1,
NumThread: 0, // let the runtime decide
// options set when the model is loaded
NumCtx: 2048,
RopeFrequencyBase: 10000.0,
RopeFrequencyScale: 1.0,
NumBatch: 512,
NumGPU: -1, // -1 here indicates that NumGPU should be set dynamically
NumGQA: 1,
NumThread: 0, // let the runtime decide
LowVRAM: false,
F16KV: true,
UseMLock: false,
UseMMap: true,
UseNUMA: false,
EmbeddingOnly: true,
}
}

View File

@@ -380,7 +380,20 @@ func pull(model string, insecure bool) error {
func RunGenerate(cmd *cobra.Command, args []string) error {
if len(args) > 1 {
// join all args into a single prompt
return generate(cmd, args[0], strings.Join(args[1:], " "))
wordWrap := false
if term.IsTerminal(int(os.Stdout.Fd())) {
wordWrap = true
}
nowrap, err := cmd.Flags().GetBool("nowordwrap")
if err != nil {
return err
}
if nowrap {
wordWrap = false
}
return generate(cmd, args[0], strings.Join(args[1:], " "), wordWrap)
}
if readline.IsTerminal(int(os.Stdin.Fd())) {
@@ -392,7 +405,7 @@ func RunGenerate(cmd *cobra.Command, args []string) error {
type generateContextKey string
func generate(cmd *cobra.Command, model, prompt string) error {
func generate(cmd *cobra.Command, model, prompt string, wordWrap bool) error {
client, err := api.FromEnv()
if err != nil {
return err
@@ -408,24 +421,9 @@ func generate(cmd *cobra.Command, model, prompt string) error {
generateContext = []int{}
}
var wrapTerm bool
termType := os.Getenv("TERM")
if termType == "xterm-256color" {
wrapTerm = true
}
termWidth, _, err := term.GetSize(int(0))
if err != nil {
wrapTerm = false
}
// override wrapping if the user turned it off
nowrap, err := cmd.Flags().GetBool("nowordwrap")
if err != nil {
return err
}
if nowrap {
wrapTerm = false
wordWrap = false
}
cancelCtx, cancel := context.WithCancel(context.Background())
@@ -452,7 +450,7 @@ func generate(cmd *cobra.Command, model, prompt string) error {
latest = response
if wrapTerm {
if wordWrap {
for _, ch := range response.Response {
if currentLineLength+1 > termWidth-5 {
// backtrack the length of the last word and clear to the end of the line
@@ -533,7 +531,7 @@ func generateInteractive(cmd *cobra.Command, model string) error {
}
// load the model
if err := generate(cmd, model, ""); err != nil {
if err := generate(cmd, model, "", false); err != nil {
return err
}
@@ -579,6 +577,21 @@ func generateInteractive(cmd *cobra.Command, model string) error {
}
defer scanner.Close()
var wordWrap bool
termType := os.Getenv("TERM")
if termType == "xterm-256color" {
wordWrap = true
}
// override wrapping if the user turned it off
nowrap, err := cmd.Flags().GetBool("nowordwrap")
if err != nil {
return err
}
if nowrap {
wordWrap = false
}
var multiLineBuffer string
var isMultiLine bool
@@ -632,10 +645,10 @@ func generateInteractive(cmd *cobra.Command, model string) error {
case "nohistory":
scanner.HistoryDisable()
case "wordwrap":
cmd.Flags().Set("nowordwrap", "false")
wordWrap = true
fmt.Println("Set 'wordwrap' mode.")
case "nowordwrap":
cmd.Flags().Set("nowordwrap", "true")
wordWrap = false
fmt.Println("Set 'nowordwrap' mode.")
case "verbose":
cmd.Flags().Set("verbose", "true")
@@ -673,15 +686,31 @@ func generateInteractive(cmd *cobra.Command, model string) error {
switch args[1] {
case "license":
fmt.Println(resp.License)
if resp.License == "" {
fmt.Println("No license was specified for this model.\n")
} else {
fmt.Println(resp.License)
}
case "modelfile":
fmt.Println(resp.Modelfile)
case "parameters":
fmt.Println(resp.Parameters)
if resp.Parameters == "" {
fmt.Println("No parameters were specified for this model.\n")
} else {
fmt.Println(resp.Parameters)
}
case "system":
fmt.Println(resp.System)
if resp.System == "" {
fmt.Println("No system prompt was specified for this model.\n")
} else {
fmt.Println(resp.System)
}
case "template":
fmt.Println(resp.Template)
if resp.Template == "" {
fmt.Println("No prompt template was specified for this model.\n")
} else {
fmt.Println(resp.Template)
}
default:
fmt.Printf("Unknown command '/show %s'. Type /? for help\n", args[1])
}
@@ -698,7 +727,7 @@ func generateInteractive(cmd *cobra.Command, model string) error {
}
if len(line) > 0 && line[0] != '/' {
if err := generate(cmd, model, line); err != nil {
if err := generate(cmd, model, line, wordWrap); err != nil {
return err
}
}
@@ -710,7 +739,7 @@ func generateBatch(cmd *cobra.Command, model string) error {
for scanner.Scan() {
prompt := scanner.Text()
fmt.Printf(">>> %s\n", prompt)
if err := generate(cmd, model, prompt); err != nil {
if err := generate(cmd, model, prompt, false); err != nil {
return err
}
}

View File

@@ -10,25 +10,25 @@ Install required tools:
- go version 1.20 or higher
- gcc version 11.4.0 or higher
```
```bash
brew install go cmake gcc
```
Get the required libraries:
```
```bash
go generate ./...
```
Then build ollama:
```
```bash
go build .
```
Now you can run `ollama`:
```
```bash
./ollama
```

View File

@@ -2,13 +2,13 @@
## How can I expose the Ollama server?
```
```bash
OLLAMA_HOST=0.0.0.0:11435 ollama serve
```
By default, Ollama allows cross origin requests from `127.0.0.1` and `0.0.0.0`. To support more origins, you can use the `OLLAMA_ORIGINS` environment variable:
```
```bash
OLLAMA_ORIGINS=http://192.168.1.1:*,https://example.com ollama serve
```
@@ -16,4 +16,3 @@ OLLAMA_ORIGINS=http://192.168.1.1:*,https://example.com ollama serve
* macOS: Raw model data is stored under `~/.ollama/models`.
* Linux: Raw model data is stored under `/usr/share/ollama/.ollama/models`

View File

@@ -2,7 +2,7 @@
> Note: A one line installer for Ollama is available by running:
>
> ```
> ```bash
> curl https://ollama.ai/install.sh | sh
> ```
@@ -10,7 +10,7 @@
Ollama is distributed as a self-contained binary. Download it to a directory in your PATH:
```
```bash
sudo curl -L https://ollama.ai/download/ollama-linux-amd64 -o /usr/bin/ollama
sudo chmod +x /usr/bin/ollama
```
@@ -19,13 +19,13 @@ sudo chmod +x /usr/bin/ollama
Start Ollama by running `ollama serve`:
```
```bash
ollama serve
```
Once Ollama is running, run a model in another terminal session:
```
```bash
ollama run llama2
```
@@ -35,7 +35,7 @@ ollama run llama2
Verify that the drivers are installed by running the following command, which should print details about your GPU:
```
```bash
nvidia-smi
```
@@ -43,7 +43,7 @@ nvidia-smi
Create a user for Ollama:
```
```bash
sudo useradd -r -s /bin/false -m -d /usr/share/ollama ollama
```
@@ -68,7 +68,7 @@ WantedBy=default.target
Then start the service:
```
```bash
sudo systemctl daemon-reload
sudo systemctl enable ollama
```
@@ -77,7 +77,7 @@ sudo systemctl enable ollama
To view logs of Ollama running as a startup service, run:
```
```bash
journalctl -u ollama
```

View File

@@ -44,7 +44,7 @@ INSTRUCTION arguments
An example of a model file creating a mario blueprint:
```
```modelfile
FROM llama2
# sets the temperature to 1 [higher is more creative, lower is more coherent]
PARAMETER temperature 1
@@ -70,13 +70,13 @@ More examples are available in the [examples directory](../examples).
The FROM instruction defines the base model to use when creating a model.
```
```modelfile
FROM <model name>:<tag>
```
#### Build from llama2
```
```modelfile
FROM llama2
```
@@ -85,7 +85,7 @@ A list of available base models:
#### Build from a bin file
```
```modelfile
FROM ./ollama-model.bin
```
@@ -95,7 +95,7 @@ This bin file location should be specified as an absolute path or relative to th
The EMBED instruction is used to add embeddings of files to a model. This is useful for adding custom data that the model can reference when generating an answer. Note that currently only text files are supported, formatted with each line as one embedding.
```
```modelfile
FROM <model name>:<tag>
EMBED <file path>.txt
EMBED <different file path>.txt
@@ -106,7 +106,7 @@ EMBED <path to directory>/*.txt
The `PARAMETER` instruction defines a parameter that can be set when the model is run.
```
```modelfile
PARAMETER <parameter> <parametervalue>
```
@@ -142,7 +142,7 @@ PARAMETER <parameter> <parametervalue>
| `{{ .Prompt }}` | The incoming prompt, this is not specified in the model file and will be set based on input. |
| `{{ .First }}` | A boolean value used to render specific template information for the first generation of a session. |
```
```modelfile
TEMPLATE """
{{- if .First }}
### System:
@@ -162,7 +162,7 @@ SYSTEM """<system message>"""
The `SYSTEM` instruction specifies the system prompt to be used in the template, if applicable.
```
```modelfile
SYSTEM """<system message>"""
```
@@ -170,7 +170,7 @@ SYSTEM """<system message>"""
The `ADAPTER` instruction specifies the LoRA adapter to apply to the base model. The value of this instruction should be an absolute path or a path relative to the Modelfile and the file must be in a GGML file format. The adapter should be tuned from the base model otherwise the behaviour is undefined.
```
```modelfile
ADAPTER ./ollama-lora.bin
```
@@ -178,7 +178,7 @@ ADAPTER ./ollama-lora.bin
The `LICENSE` instruction allows you to specify the legal license under which the model used with this Modelfile is shared or distributed.
```
```modelfile
LICENSE """
<license text>
"""

View File

@@ -218,7 +218,6 @@ func NumGPU(numLayer, fileSizeBytes int64, opts api.Options) int {
if opts.NumGPU != -1 {
return opts.NumGPU
}
n := 1 // default to enable metal on macOS
if runtime.GOOS == "linux" {
vramMib, err := CheckVRAM()
if err != nil {
@@ -235,10 +234,11 @@ func NumGPU(numLayer, fileSizeBytes int64, opts api.Options) int {
// TODO: this is a rough heuristic, better would be to calculate this based on number of layers and context size
bytesPerLayer := fileSizeBytes / numLayer
// set n to the max number of layers we can fit in VRAM
return int(totalVramBytes / bytesPerLayer)
// max number of layers we can fit in VRAM
layers := int(totalVramBytes / bytesPerLayer)
log.Printf("%d MiB VRAM available, loading up to %d GPU layers", vramMib, layers)
log.Printf("%d MiB VRAM available, loading up to %d GPU layers", vramMib, n)
return layers
}
// default to enable metal on macOS
return 1
@@ -417,28 +417,25 @@ type Prediction struct {
}
type PredictRequest struct {
Stream bool `json:"stream"`
NPredict int `json:"n_predict,omitempty"`
TopK int `json:"top_k,omitempty"`
TopP float32 `json:"top_p,omitempty"`
TfsZ float32 `json:"tfs_z,omitempty"`
TypicalP float32 `json:"typical_p,omitempty"`
RepeatLastN int `json:"repeat_last_n,omitempty"`
Temperature float32 `json:"temperature,omitempty"`
RepeatPenalty float32 `json:"repeat_penalty,omitempty"`
PresencePenalty float32 `json:"presence_penalty,omitempty"`
FrequencyPenalty float32 `json:"frequency_penalty,omitempty"`
Mirostat int `json:"mirostat,omitempty"`
MirostatTau float32 `json:"mirostat_tau,omitempty"`
MirostatEta float32 `json:"mirostat_eta,omitempty"`
PenalizeNl bool `json:"penalize_nl,omitempty"`
NKeep int `json:"n_keep,omitempty"`
Seed int `json:"seed,omitempty"`
Prompt string `json:"prompt,omitempty"`
NProbs int `json:"n_probs,omitempty"`
LogitBias map[int]float32 `json:"logit_bias,omitempty"`
IgnoreEos bool `json:"ignore_eos,omitempty"`
Stop []string `json:"stop,omitempty"`
Prompt string `json:"prompt"`
Stream bool `json:"stream"`
NPredict int `json:"n_predict"`
NKeep int `json:"n_keep"`
Temperature float32 `json:"temperature"`
TopK int `json:"top_k"`
TopP float32 `json:"top_p"`
TfsZ float32 `json:"tfs_z"`
TypicalP float32 `json:"typical_p"`
RepeatLastN int `json:"repeat_last_n"`
RepeatPenalty float32 `json:"repeat_penalty"`
PresencePenalty float32 `json:"presence_penalty"`
FrequencyPenalty float32 `json:"frequency_penalty"`
Mirostat int `json:"mirostat"`
MirostatTau float32 `json:"mirostat_tau"`
MirostatEta float32 `json:"mirostat_eta"`
PenalizeNl bool `json:"penalize_nl"`
Seed int `json:"seed"`
Stop []string `json:"stop,omitempty"`
}
func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string, fn func(api.GenerateResponse)) error {
@@ -470,8 +467,10 @@ func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string,
MirostatTau: llm.MirostatTau,
MirostatEta: llm.MirostatEta,
PenalizeNl: llm.PenalizeNewline,
Seed: llm.Seed,
Stop: llm.Stop,
}
data, err := json.Marshal(predReq)
if err != nil {
return fmt.Errorf("error marshaling data: %v", err)

0
scripts/build_docker.sh Normal file → Executable file
View File

View File

@@ -54,54 +54,6 @@ type Model struct {
Embeddings []vector.Embedding
}
func (m *Model) ChatPrompt(messages []api.Message) (string, error) {
tmpl, err := template.New("").Parse(m.Template)
if err != nil {
return "", err
}
var vars struct {
System string
Prompt string
First bool
}
vars.First = true
var sb strings.Builder
flush := func() {
tmpl.Execute(&sb, vars)
vars.System = ""
vars.Prompt = ""
}
// build the chat history from messages
for _, m := range messages {
if m.Role == "system" {
if vars.System != "" {
flush()
}
vars.System = m.Content
}
if m.Role == "user" {
if vars.Prompt != "" {
flush()
}
vars.Prompt = m.Content
}
if m.Role == "assistant" {
flush()
sb.Write([]byte(m.Content))
}
}
flush()
return sb.String(), nil
}
func (m *Model) Prompt(request api.GenerateRequest, embedding string) (string, error) {
t := m.Template
if request.Template != "" {

View File

@@ -156,54 +156,6 @@ func load(ctx context.Context, workDir string, model *Model, reqOpts map[string]
return nil
}
func ChatModelHandler(c *gin.Context) {
loaded.mu.Lock()
defer loaded.mu.Unlock()
var req api.ChatRequest
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
}
model, err := GetModel(req.Model)
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
}
prompt, err := model.ChatPrompt(req.Messages)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
var response string
fn := func(r api.GenerateResponse) {
response += r.Response
}
workDir := c.GetString("workDir")
if err := load(c.Request.Context(), workDir, model, nil, defaultSessionDuration); err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
fmt.Println(prompt)
if err := loaded.llm.Predict(c.Request.Context(), []int{}, prompt, fn); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
}
c.JSON(http.StatusOK, api.ChatResponse{
Message: api.Message{
Role: "assistant",
Content: response,
},
CreatedAt: time.Now().UTC(),
})
}
func GenerateHandler(c *gin.Context) {
loaded.mu.Lock()
defer loaded.mu.Unlock()
@@ -600,7 +552,6 @@ func Serve(ln net.Listener, allowOrigins []string) error {
},
)
r.POST("/api/chat", ChatModelHandler)
r.POST("/api/pull", PullModelHandler)
r.POST("/api/generate", GenerateHandler)
r.POST("/api/embeddings", EmbeddingHandler)