mirror of
https://github.com/ollama/ollama.git
synced 2025-12-27 17:50:53 -05:00
Compare commits
7 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1852755154 | ||
|
|
b1f7123301 | ||
|
|
1fbf3585d6 | ||
|
|
99d5161e8a | ||
|
|
ea8380be45 | ||
|
|
4f25092dc1 | ||
|
|
4fc10acce9 |
@@ -217,6 +217,7 @@ curl -X POST http://localhost:11434/api/generate -d '{
|
||||
- [Dagger Chatbot](https://github.com/samalba/dagger-chatbot)
|
||||
- [LiteLLM](https://github.com/BerriAI/litellm)
|
||||
- [Discord AI Bot](https://github.com/mekb-turtle/discord-ai-bot)
|
||||
- [Chatbot UI](https://github.com/ivanfioravanti/chatbot-ollama)
|
||||
- [HTML UI](https://github.com/rtcfirefly/ollama-ui)
|
||||
- [Typescript UI](https://github.com/ollama-interface/Ollama-Gui?tab=readme-ov-file)
|
||||
- [Dumbar](https://github.com/JerrySievert/Dumbar)
|
||||
|
||||
60
api/types.go
60
api/types.go
@@ -31,22 +31,6 @@ func (e StatusError) Error() string {
|
||||
}
|
||||
}
|
||||
|
||||
// /api/chat
|
||||
type Message struct {
|
||||
Role string `json:"role"`
|
||||
Content string `json:"content"`
|
||||
}
|
||||
|
||||
type ChatRequest struct {
|
||||
Model string `json:"model"`
|
||||
Messages []Message `json:"messages"`
|
||||
}
|
||||
|
||||
type ChatResponse struct {
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
Message Message `json:"message"`
|
||||
}
|
||||
|
||||
type GenerateRequest struct {
|
||||
Model string `json:"model"`
|
||||
Prompt string `json:"prompt"`
|
||||
@@ -296,38 +280,38 @@ func (opts *Options) FromMap(m map[string]interface{}) error {
|
||||
|
||||
func DefaultOptions() Options {
|
||||
return Options{
|
||||
Seed: -1,
|
||||
|
||||
UseNUMA: false,
|
||||
|
||||
NumCtx: 2048,
|
||||
NumKeep: -1,
|
||||
NumBatch: 512,
|
||||
NumGPU: -1, // -1 here indicates that NumGPU should be set dynamically
|
||||
NumGQA: 1,
|
||||
LowVRAM: false,
|
||||
F16KV: true,
|
||||
UseMMap: true,
|
||||
UseMLock: false,
|
||||
RopeFrequencyBase: 10000.0,
|
||||
RopeFrequencyScale: 1.0,
|
||||
EmbeddingOnly: true,
|
||||
|
||||
RepeatLastN: 64,
|
||||
RepeatPenalty: 1.1,
|
||||
FrequencyPenalty: 0.0,
|
||||
PresencePenalty: 0.0,
|
||||
// options set on request to runner
|
||||
NumPredict: -1,
|
||||
NumKeep: -1,
|
||||
Temperature: 0.8,
|
||||
TopK: 40,
|
||||
TopP: 0.9,
|
||||
TFSZ: 1.0,
|
||||
TypicalP: 1.0,
|
||||
RepeatLastN: 64,
|
||||
RepeatPenalty: 1.1,
|
||||
PresencePenalty: 0.0,
|
||||
FrequencyPenalty: 0.0,
|
||||
Mirostat: 0,
|
||||
MirostatTau: 5.0,
|
||||
MirostatEta: 0.1,
|
||||
PenalizeNewline: true,
|
||||
Seed: -1,
|
||||
|
||||
NumThread: 0, // let the runtime decide
|
||||
// options set when the model is loaded
|
||||
NumCtx: 2048,
|
||||
RopeFrequencyBase: 10000.0,
|
||||
RopeFrequencyScale: 1.0,
|
||||
NumBatch: 512,
|
||||
NumGPU: -1, // -1 here indicates that NumGPU should be set dynamically
|
||||
NumGQA: 1,
|
||||
NumThread: 0, // let the runtime decide
|
||||
LowVRAM: false,
|
||||
F16KV: true,
|
||||
UseMLock: false,
|
||||
UseMMap: true,
|
||||
UseNUMA: false,
|
||||
EmbeddingOnly: true,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
85
cmd/cmd.go
85
cmd/cmd.go
@@ -380,7 +380,20 @@ func pull(model string, insecure bool) error {
|
||||
func RunGenerate(cmd *cobra.Command, args []string) error {
|
||||
if len(args) > 1 {
|
||||
// join all args into a single prompt
|
||||
return generate(cmd, args[0], strings.Join(args[1:], " "))
|
||||
wordWrap := false
|
||||
if term.IsTerminal(int(os.Stdout.Fd())) {
|
||||
wordWrap = true
|
||||
}
|
||||
|
||||
nowrap, err := cmd.Flags().GetBool("nowordwrap")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if nowrap {
|
||||
wordWrap = false
|
||||
}
|
||||
|
||||
return generate(cmd, args[0], strings.Join(args[1:], " "), wordWrap)
|
||||
}
|
||||
|
||||
if readline.IsTerminal(int(os.Stdin.Fd())) {
|
||||
@@ -392,7 +405,7 @@ func RunGenerate(cmd *cobra.Command, args []string) error {
|
||||
|
||||
type generateContextKey string
|
||||
|
||||
func generate(cmd *cobra.Command, model, prompt string) error {
|
||||
func generate(cmd *cobra.Command, model, prompt string, wordWrap bool) error {
|
||||
client, err := api.FromEnv()
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -408,24 +421,9 @@ func generate(cmd *cobra.Command, model, prompt string) error {
|
||||
generateContext = []int{}
|
||||
}
|
||||
|
||||
var wrapTerm bool
|
||||
termType := os.Getenv("TERM")
|
||||
if termType == "xterm-256color" {
|
||||
wrapTerm = true
|
||||
}
|
||||
|
||||
termWidth, _, err := term.GetSize(int(0))
|
||||
if err != nil {
|
||||
wrapTerm = false
|
||||
}
|
||||
|
||||
// override wrapping if the user turned it off
|
||||
nowrap, err := cmd.Flags().GetBool("nowordwrap")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if nowrap {
|
||||
wrapTerm = false
|
||||
wordWrap = false
|
||||
}
|
||||
|
||||
cancelCtx, cancel := context.WithCancel(context.Background())
|
||||
@@ -452,7 +450,7 @@ func generate(cmd *cobra.Command, model, prompt string) error {
|
||||
|
||||
latest = response
|
||||
|
||||
if wrapTerm {
|
||||
if wordWrap {
|
||||
for _, ch := range response.Response {
|
||||
if currentLineLength+1 > termWidth-5 {
|
||||
// backtrack the length of the last word and clear to the end of the line
|
||||
@@ -533,7 +531,7 @@ func generateInteractive(cmd *cobra.Command, model string) error {
|
||||
}
|
||||
|
||||
// load the model
|
||||
if err := generate(cmd, model, ""); err != nil {
|
||||
if err := generate(cmd, model, "", false); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -579,6 +577,21 @@ func generateInteractive(cmd *cobra.Command, model string) error {
|
||||
}
|
||||
defer scanner.Close()
|
||||
|
||||
var wordWrap bool
|
||||
termType := os.Getenv("TERM")
|
||||
if termType == "xterm-256color" {
|
||||
wordWrap = true
|
||||
}
|
||||
|
||||
// override wrapping if the user turned it off
|
||||
nowrap, err := cmd.Flags().GetBool("nowordwrap")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if nowrap {
|
||||
wordWrap = false
|
||||
}
|
||||
|
||||
var multiLineBuffer string
|
||||
var isMultiLine bool
|
||||
|
||||
@@ -632,10 +645,10 @@ func generateInteractive(cmd *cobra.Command, model string) error {
|
||||
case "nohistory":
|
||||
scanner.HistoryDisable()
|
||||
case "wordwrap":
|
||||
cmd.Flags().Set("nowordwrap", "false")
|
||||
wordWrap = true
|
||||
fmt.Println("Set 'wordwrap' mode.")
|
||||
case "nowordwrap":
|
||||
cmd.Flags().Set("nowordwrap", "true")
|
||||
wordWrap = false
|
||||
fmt.Println("Set 'nowordwrap' mode.")
|
||||
case "verbose":
|
||||
cmd.Flags().Set("verbose", "true")
|
||||
@@ -673,15 +686,31 @@ func generateInteractive(cmd *cobra.Command, model string) error {
|
||||
|
||||
switch args[1] {
|
||||
case "license":
|
||||
fmt.Println(resp.License)
|
||||
if resp.License == "" {
|
||||
fmt.Println("No license was specified for this model.\n")
|
||||
} else {
|
||||
fmt.Println(resp.License)
|
||||
}
|
||||
case "modelfile":
|
||||
fmt.Println(resp.Modelfile)
|
||||
case "parameters":
|
||||
fmt.Println(resp.Parameters)
|
||||
if resp.Parameters == "" {
|
||||
fmt.Println("No parameters were specified for this model.\n")
|
||||
} else {
|
||||
fmt.Println(resp.Parameters)
|
||||
}
|
||||
case "system":
|
||||
fmt.Println(resp.System)
|
||||
if resp.System == "" {
|
||||
fmt.Println("No system prompt was specified for this model.\n")
|
||||
} else {
|
||||
fmt.Println(resp.System)
|
||||
}
|
||||
case "template":
|
||||
fmt.Println(resp.Template)
|
||||
if resp.Template == "" {
|
||||
fmt.Println("No prompt template was specified for this model.\n")
|
||||
} else {
|
||||
fmt.Println(resp.Template)
|
||||
}
|
||||
default:
|
||||
fmt.Printf("Unknown command '/show %s'. Type /? for help\n", args[1])
|
||||
}
|
||||
@@ -698,7 +727,7 @@ func generateInteractive(cmd *cobra.Command, model string) error {
|
||||
}
|
||||
|
||||
if len(line) > 0 && line[0] != '/' {
|
||||
if err := generate(cmd, model, line); err != nil {
|
||||
if err := generate(cmd, model, line, wordWrap); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
@@ -710,7 +739,7 @@ func generateBatch(cmd *cobra.Command, model string) error {
|
||||
for scanner.Scan() {
|
||||
prompt := scanner.Text()
|
||||
fmt.Printf(">>> %s\n", prompt)
|
||||
if err := generate(cmd, model, prompt); err != nil {
|
||||
if err := generate(cmd, model, prompt, false); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
@@ -10,25 +10,25 @@ Install required tools:
|
||||
- go version 1.20 or higher
|
||||
- gcc version 11.4.0 or higher
|
||||
|
||||
```
|
||||
```bash
|
||||
brew install go cmake gcc
|
||||
```
|
||||
|
||||
Get the required libraries:
|
||||
|
||||
```
|
||||
```bash
|
||||
go generate ./...
|
||||
```
|
||||
|
||||
Then build ollama:
|
||||
|
||||
```
|
||||
```bash
|
||||
go build .
|
||||
```
|
||||
|
||||
Now you can run `ollama`:
|
||||
|
||||
```
|
||||
```bash
|
||||
./ollama
|
||||
```
|
||||
|
||||
|
||||
@@ -2,13 +2,13 @@
|
||||
|
||||
## How can I expose the Ollama server?
|
||||
|
||||
```
|
||||
```bash
|
||||
OLLAMA_HOST=0.0.0.0:11435 ollama serve
|
||||
```
|
||||
|
||||
By default, Ollama allows cross origin requests from `127.0.0.1` and `0.0.0.0`. To support more origins, you can use the `OLLAMA_ORIGINS` environment variable:
|
||||
|
||||
```
|
||||
```bash
|
||||
OLLAMA_ORIGINS=http://192.168.1.1:*,https://example.com ollama serve
|
||||
```
|
||||
|
||||
@@ -16,4 +16,3 @@ OLLAMA_ORIGINS=http://192.168.1.1:*,https://example.com ollama serve
|
||||
|
||||
* macOS: Raw model data is stored under `~/.ollama/models`.
|
||||
* Linux: Raw model data is stored under `/usr/share/ollama/.ollama/models`
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
> Note: A one line installer for Ollama is available by running:
|
||||
>
|
||||
> ```
|
||||
> ```bash
|
||||
> curl https://ollama.ai/install.sh | sh
|
||||
> ```
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
|
||||
Ollama is distributed as a self-contained binary. Download it to a directory in your PATH:
|
||||
|
||||
```
|
||||
```bash
|
||||
sudo curl -L https://ollama.ai/download/ollama-linux-amd64 -o /usr/bin/ollama
|
||||
sudo chmod +x /usr/bin/ollama
|
||||
```
|
||||
@@ -19,13 +19,13 @@ sudo chmod +x /usr/bin/ollama
|
||||
|
||||
Start Ollama by running `ollama serve`:
|
||||
|
||||
```
|
||||
```bash
|
||||
ollama serve
|
||||
```
|
||||
|
||||
Once Ollama is running, run a model in another terminal session:
|
||||
|
||||
```
|
||||
```bash
|
||||
ollama run llama2
|
||||
```
|
||||
|
||||
@@ -35,7 +35,7 @@ ollama run llama2
|
||||
|
||||
Verify that the drivers are installed by running the following command, which should print details about your GPU:
|
||||
|
||||
```
|
||||
```bash
|
||||
nvidia-smi
|
||||
```
|
||||
|
||||
@@ -43,7 +43,7 @@ nvidia-smi
|
||||
|
||||
Create a user for Ollama:
|
||||
|
||||
```
|
||||
```bash
|
||||
sudo useradd -r -s /bin/false -m -d /usr/share/ollama ollama
|
||||
```
|
||||
|
||||
@@ -68,7 +68,7 @@ WantedBy=default.target
|
||||
|
||||
Then start the service:
|
||||
|
||||
```
|
||||
```bash
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl enable ollama
|
||||
```
|
||||
@@ -77,7 +77,7 @@ sudo systemctl enable ollama
|
||||
|
||||
To view logs of Ollama running as a startup service, run:
|
||||
|
||||
```
|
||||
```bash
|
||||
journalctl -u ollama
|
||||
```
|
||||
|
||||
|
||||
@@ -44,7 +44,7 @@ INSTRUCTION arguments
|
||||
|
||||
An example of a model file creating a mario blueprint:
|
||||
|
||||
```
|
||||
```modelfile
|
||||
FROM llama2
|
||||
# sets the temperature to 1 [higher is more creative, lower is more coherent]
|
||||
PARAMETER temperature 1
|
||||
@@ -70,13 +70,13 @@ More examples are available in the [examples directory](../examples).
|
||||
|
||||
The FROM instruction defines the base model to use when creating a model.
|
||||
|
||||
```
|
||||
```modelfile
|
||||
FROM <model name>:<tag>
|
||||
```
|
||||
|
||||
#### Build from llama2
|
||||
|
||||
```
|
||||
```modelfile
|
||||
FROM llama2
|
||||
```
|
||||
|
||||
@@ -85,7 +85,7 @@ A list of available base models:
|
||||
|
||||
#### Build from a bin file
|
||||
|
||||
```
|
||||
```modelfile
|
||||
FROM ./ollama-model.bin
|
||||
```
|
||||
|
||||
@@ -95,7 +95,7 @@ This bin file location should be specified as an absolute path or relative to th
|
||||
|
||||
The EMBED instruction is used to add embeddings of files to a model. This is useful for adding custom data that the model can reference when generating an answer. Note that currently only text files are supported, formatted with each line as one embedding.
|
||||
|
||||
```
|
||||
```modelfile
|
||||
FROM <model name>:<tag>
|
||||
EMBED <file path>.txt
|
||||
EMBED <different file path>.txt
|
||||
@@ -106,7 +106,7 @@ EMBED <path to directory>/*.txt
|
||||
|
||||
The `PARAMETER` instruction defines a parameter that can be set when the model is run.
|
||||
|
||||
```
|
||||
```modelfile
|
||||
PARAMETER <parameter> <parametervalue>
|
||||
```
|
||||
|
||||
@@ -142,7 +142,7 @@ PARAMETER <parameter> <parametervalue>
|
||||
| `{{ .Prompt }}` | The incoming prompt, this is not specified in the model file and will be set based on input. |
|
||||
| `{{ .First }}` | A boolean value used to render specific template information for the first generation of a session. |
|
||||
|
||||
```
|
||||
```modelfile
|
||||
TEMPLATE """
|
||||
{{- if .First }}
|
||||
### System:
|
||||
@@ -162,7 +162,7 @@ SYSTEM """<system message>"""
|
||||
|
||||
The `SYSTEM` instruction specifies the system prompt to be used in the template, if applicable.
|
||||
|
||||
```
|
||||
```modelfile
|
||||
SYSTEM """<system message>"""
|
||||
```
|
||||
|
||||
@@ -170,7 +170,7 @@ SYSTEM """<system message>"""
|
||||
|
||||
The `ADAPTER` instruction specifies the LoRA adapter to apply to the base model. The value of this instruction should be an absolute path or a path relative to the Modelfile and the file must be in a GGML file format. The adapter should be tuned from the base model otherwise the behaviour is undefined.
|
||||
|
||||
```
|
||||
```modelfile
|
||||
ADAPTER ./ollama-lora.bin
|
||||
```
|
||||
|
||||
@@ -178,7 +178,7 @@ ADAPTER ./ollama-lora.bin
|
||||
|
||||
The `LICENSE` instruction allows you to specify the legal license under which the model used with this Modelfile is shared or distributed.
|
||||
|
||||
```
|
||||
```modelfile
|
||||
LICENSE """
|
||||
<license text>
|
||||
"""
|
||||
|
||||
51
llm/llama.go
51
llm/llama.go
@@ -218,7 +218,6 @@ func NumGPU(numLayer, fileSizeBytes int64, opts api.Options) int {
|
||||
if opts.NumGPU != -1 {
|
||||
return opts.NumGPU
|
||||
}
|
||||
n := 1 // default to enable metal on macOS
|
||||
if runtime.GOOS == "linux" {
|
||||
vramMib, err := CheckVRAM()
|
||||
if err != nil {
|
||||
@@ -235,10 +234,11 @@ func NumGPU(numLayer, fileSizeBytes int64, opts api.Options) int {
|
||||
// TODO: this is a rough heuristic, better would be to calculate this based on number of layers and context size
|
||||
bytesPerLayer := fileSizeBytes / numLayer
|
||||
|
||||
// set n to the max number of layers we can fit in VRAM
|
||||
return int(totalVramBytes / bytesPerLayer)
|
||||
// max number of layers we can fit in VRAM
|
||||
layers := int(totalVramBytes / bytesPerLayer)
|
||||
log.Printf("%d MiB VRAM available, loading up to %d GPU layers", vramMib, layers)
|
||||
|
||||
log.Printf("%d MiB VRAM available, loading up to %d GPU layers", vramMib, n)
|
||||
return layers
|
||||
}
|
||||
// default to enable metal on macOS
|
||||
return 1
|
||||
@@ -417,28 +417,25 @@ type Prediction struct {
|
||||
}
|
||||
|
||||
type PredictRequest struct {
|
||||
Stream bool `json:"stream"`
|
||||
NPredict int `json:"n_predict,omitempty"`
|
||||
TopK int `json:"top_k,omitempty"`
|
||||
TopP float32 `json:"top_p,omitempty"`
|
||||
TfsZ float32 `json:"tfs_z,omitempty"`
|
||||
TypicalP float32 `json:"typical_p,omitempty"`
|
||||
RepeatLastN int `json:"repeat_last_n,omitempty"`
|
||||
Temperature float32 `json:"temperature,omitempty"`
|
||||
RepeatPenalty float32 `json:"repeat_penalty,omitempty"`
|
||||
PresencePenalty float32 `json:"presence_penalty,omitempty"`
|
||||
FrequencyPenalty float32 `json:"frequency_penalty,omitempty"`
|
||||
Mirostat int `json:"mirostat,omitempty"`
|
||||
MirostatTau float32 `json:"mirostat_tau,omitempty"`
|
||||
MirostatEta float32 `json:"mirostat_eta,omitempty"`
|
||||
PenalizeNl bool `json:"penalize_nl,omitempty"`
|
||||
NKeep int `json:"n_keep,omitempty"`
|
||||
Seed int `json:"seed,omitempty"`
|
||||
Prompt string `json:"prompt,omitempty"`
|
||||
NProbs int `json:"n_probs,omitempty"`
|
||||
LogitBias map[int]float32 `json:"logit_bias,omitempty"`
|
||||
IgnoreEos bool `json:"ignore_eos,omitempty"`
|
||||
Stop []string `json:"stop,omitempty"`
|
||||
Prompt string `json:"prompt"`
|
||||
Stream bool `json:"stream"`
|
||||
NPredict int `json:"n_predict"`
|
||||
NKeep int `json:"n_keep"`
|
||||
Temperature float32 `json:"temperature"`
|
||||
TopK int `json:"top_k"`
|
||||
TopP float32 `json:"top_p"`
|
||||
TfsZ float32 `json:"tfs_z"`
|
||||
TypicalP float32 `json:"typical_p"`
|
||||
RepeatLastN int `json:"repeat_last_n"`
|
||||
RepeatPenalty float32 `json:"repeat_penalty"`
|
||||
PresencePenalty float32 `json:"presence_penalty"`
|
||||
FrequencyPenalty float32 `json:"frequency_penalty"`
|
||||
Mirostat int `json:"mirostat"`
|
||||
MirostatTau float32 `json:"mirostat_tau"`
|
||||
MirostatEta float32 `json:"mirostat_eta"`
|
||||
PenalizeNl bool `json:"penalize_nl"`
|
||||
Seed int `json:"seed"`
|
||||
Stop []string `json:"stop,omitempty"`
|
||||
}
|
||||
|
||||
func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string, fn func(api.GenerateResponse)) error {
|
||||
@@ -470,8 +467,10 @@ func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string,
|
||||
MirostatTau: llm.MirostatTau,
|
||||
MirostatEta: llm.MirostatEta,
|
||||
PenalizeNl: llm.PenalizeNewline,
|
||||
Seed: llm.Seed,
|
||||
Stop: llm.Stop,
|
||||
}
|
||||
|
||||
data, err := json.Marshal(predReq)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error marshaling data: %v", err)
|
||||
|
||||
0
scripts/build_docker.sh
Normal file → Executable file
0
scripts/build_docker.sh
Normal file → Executable file
@@ -54,54 +54,6 @@ type Model struct {
|
||||
Embeddings []vector.Embedding
|
||||
}
|
||||
|
||||
func (m *Model) ChatPrompt(messages []api.Message) (string, error) {
|
||||
tmpl, err := template.New("").Parse(m.Template)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
var vars struct {
|
||||
System string
|
||||
Prompt string
|
||||
First bool
|
||||
}
|
||||
|
||||
vars.First = true
|
||||
|
||||
var sb strings.Builder
|
||||
flush := func() {
|
||||
tmpl.Execute(&sb, vars)
|
||||
vars.System = ""
|
||||
vars.Prompt = ""
|
||||
}
|
||||
|
||||
// build the chat history from messages
|
||||
for _, m := range messages {
|
||||
if m.Role == "system" {
|
||||
if vars.System != "" {
|
||||
flush()
|
||||
}
|
||||
vars.System = m.Content
|
||||
}
|
||||
|
||||
if m.Role == "user" {
|
||||
if vars.Prompt != "" {
|
||||
flush()
|
||||
}
|
||||
vars.Prompt = m.Content
|
||||
}
|
||||
|
||||
if m.Role == "assistant" {
|
||||
flush()
|
||||
sb.Write([]byte(m.Content))
|
||||
}
|
||||
}
|
||||
|
||||
flush()
|
||||
|
||||
return sb.String(), nil
|
||||
}
|
||||
|
||||
func (m *Model) Prompt(request api.GenerateRequest, embedding string) (string, error) {
|
||||
t := m.Template
|
||||
if request.Template != "" {
|
||||
|
||||
@@ -156,54 +156,6 @@ func load(ctx context.Context, workDir string, model *Model, reqOpts map[string]
|
||||
return nil
|
||||
}
|
||||
|
||||
func ChatModelHandler(c *gin.Context) {
|
||||
loaded.mu.Lock()
|
||||
defer loaded.mu.Unlock()
|
||||
|
||||
var req api.ChatRequest
|
||||
if err := c.ShouldBindJSON(&req); err != nil {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
model, err := GetModel(req.Model)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
prompt, err := model.ChatPrompt(req.Messages)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
var response string
|
||||
fn := func(r api.GenerateResponse) {
|
||||
response += r.Response
|
||||
}
|
||||
|
||||
workDir := c.GetString("workDir")
|
||||
if err := load(c.Request.Context(), workDir, model, nil, defaultSessionDuration); err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
fmt.Println(prompt)
|
||||
|
||||
if err := loaded.llm.Predict(c.Request.Context(), []int{}, prompt, fn); err != nil {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, api.ChatResponse{
|
||||
Message: api.Message{
|
||||
Role: "assistant",
|
||||
Content: response,
|
||||
},
|
||||
CreatedAt: time.Now().UTC(),
|
||||
})
|
||||
}
|
||||
|
||||
func GenerateHandler(c *gin.Context) {
|
||||
loaded.mu.Lock()
|
||||
defer loaded.mu.Unlock()
|
||||
@@ -600,7 +552,6 @@ func Serve(ln net.Listener, allowOrigins []string) error {
|
||||
},
|
||||
)
|
||||
|
||||
r.POST("/api/chat", ChatModelHandler)
|
||||
r.POST("/api/pull", PullModelHandler)
|
||||
r.POST("/api/generate", GenerateHandler)
|
||||
r.POST("/api/embeddings", EmbeddingHandler)
|
||||
|
||||
Reference in New Issue
Block a user