mirror of
https://github.com/ollama/ollama.git
synced 2026-01-20 05:18:31 -05:00
Compare commits
14 Commits
royh/embed
...
roy-embed-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2647a0e443 | ||
|
|
ec4c35fe99 | ||
|
|
f5e3939220 | ||
|
|
ae27d9dcfd | ||
|
|
37096790a7 | ||
|
|
997c903884 | ||
|
|
c8af3c2d96 | ||
|
|
455e61170d | ||
|
|
4de1370a9d | ||
|
|
bbf8f102ee | ||
|
|
bb46bbcf5e | ||
|
|
9b60a038e5 | ||
|
|
83a0cb8d88 | ||
|
|
ebc529cbb3 |
@@ -64,7 +64,8 @@ Here are some example models that can be downloaded:
|
||||
| LLaVA | 7B | 4.5GB | `ollama run llava` |
|
||||
| Solar | 10.7B | 6.1GB | `ollama run solar` |
|
||||
|
||||
> Note: You should have at least 8 GB of RAM available to run the 7B models, 16 GB to run the 13B models, and 32 GB to run the 33B models.
|
||||
> [!NOTE]
|
||||
> You should have at least 8 GB of RAM available to run the 7B models, 16 GB to run the 13B models, and 32 GB to run the 33B models.
|
||||
|
||||
## Customize a model
|
||||
|
||||
|
||||
256
cmd/cmd.go
256
cmd/cmd.go
@@ -2,7 +2,6 @@ package cmd
|
||||
|
||||
import (
|
||||
"archive/zip"
|
||||
"bufio"
|
||||
"bytes"
|
||||
"context"
|
||||
"crypto/ed25519"
|
||||
@@ -17,7 +16,6 @@ import (
|
||||
"net"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/exec"
|
||||
"os/signal"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
@@ -33,11 +31,6 @@ import (
|
||||
"github.com/spf13/cobra"
|
||||
"golang.org/x/crypto/ssh"
|
||||
"golang.org/x/term"
|
||||
"gonum.org/v1/gonum/mat"
|
||||
"gonum.org/v1/gonum/stat"
|
||||
"gonum.org/v1/plot"
|
||||
"gonum.org/v1/plot/plotter"
|
||||
"gonum.org/v1/plot/vg"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/auth"
|
||||
@@ -377,90 +370,6 @@ func RunHandler(cmd *cobra.Command, args []string) error {
|
||||
return generate(cmd, opts)
|
||||
}
|
||||
|
||||
func EmbedHandler(cmd *cobra.Command, args []string) error {
|
||||
interactive := true
|
||||
|
||||
opts := runOptions{
|
||||
Model: args[0],
|
||||
WordWrap: os.Getenv("TERM") == "xterm-256color",
|
||||
Options: map[string]interface{}{},
|
||||
}
|
||||
|
||||
format, err := cmd.Flags().GetString("format")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
opts.Format = format
|
||||
|
||||
keepAlive, err := cmd.Flags().GetString("keepalive")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if keepAlive != "" {
|
||||
d, err := time.ParseDuration(keepAlive)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
opts.KeepAlive = &api.Duration{Duration: d}
|
||||
}
|
||||
|
||||
prompts := args[1:]
|
||||
// prepend stdin to the prompt if provided
|
||||
if !term.IsTerminal(int(os.Stdin.Fd())) {
|
||||
in, err := io.ReadAll(os.Stdin)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
prompts = append([]string{string(in)}, prompts...)
|
||||
opts.WordWrap = false
|
||||
interactive = false
|
||||
}
|
||||
opts.Prompt = strings.Join(prompts, " ")
|
||||
if len(prompts) > 0 {
|
||||
interactive = false
|
||||
}
|
||||
|
||||
nowrap, err := cmd.Flags().GetBool("nowordwrap")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
opts.WordWrap = !nowrap
|
||||
|
||||
// Fill out the rest of the options based on information about the
|
||||
// model.
|
||||
client, err := api.ClientFromEnvironment()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
name := args[0]
|
||||
info, err := func() (*api.ShowResponse, error) {
|
||||
showReq := &api.ShowRequest{Name: name}
|
||||
info, err := client.Show(cmd.Context(), showReq)
|
||||
var se api.StatusError
|
||||
if errors.As(err, &se) && se.StatusCode == http.StatusNotFound {
|
||||
if err := PullHandler(cmd, []string{name}); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return client.Show(cmd.Context(), &api.ShowRequest{Name: name})
|
||||
}
|
||||
return info, err
|
||||
}()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
opts.MultiModal = slices.Contains(info.Details.Families, "clip")
|
||||
opts.ParentModel = info.Details.ParentModel
|
||||
opts.Messages = append(opts.Messages, info.Messages...)
|
||||
|
||||
if interactive {
|
||||
return generateInteractive(cmd, opts)
|
||||
}
|
||||
return embed(cmd, opts)
|
||||
}
|
||||
|
||||
func errFromUnknownKey(unknownKeyErr error) error {
|
||||
// find SSH public key in the error message
|
||||
sshKeyPattern := `ssh-\w+ [^\s"]+`
|
||||
@@ -1070,154 +979,6 @@ func chat(cmd *cobra.Command, opts runOptions) (*api.Message, error) {
|
||||
return &api.Message{Role: role, Content: fullResponse.String()}, nil
|
||||
}
|
||||
|
||||
func embed(cmd *cobra.Command, opts runOptions) error {
|
||||
line := opts.Prompt
|
||||
client, err := api.ClientFromEnvironment()
|
||||
if err != nil {
|
||||
fmt.Println("error: couldn't connect to ollama server")
|
||||
return err
|
||||
}
|
||||
|
||||
inputs := strings.Split(line, "\n\n")
|
||||
|
||||
req := &api.EmbedRequest{
|
||||
Model: opts.Model,
|
||||
Input: inputs,
|
||||
}
|
||||
|
||||
resp, err := client.Embed(cmd.Context(), req)
|
||||
if err != nil {
|
||||
fmt.Println("error: couldn't get embeddings")
|
||||
return err
|
||||
}
|
||||
|
||||
embeddings := resp.Embeddings
|
||||
|
||||
r, c := len(embeddings), len(embeddings[0])
|
||||
data := make([]float64, r*c)
|
||||
for i := range r {
|
||||
for j := range c {
|
||||
data[i*c+j] = float64(embeddings[i][j])
|
||||
}
|
||||
}
|
||||
|
||||
X := mat.NewDense(r, c, data)
|
||||
|
||||
// Initialize PCA
|
||||
var pca stat.PC
|
||||
|
||||
// Perform PCA
|
||||
if !pca.PrincipalComponents(X, nil) {
|
||||
return fmt.Errorf("PCA failed")
|
||||
}
|
||||
|
||||
// Extract principal component vectors
|
||||
var vectors mat.Dense
|
||||
pca.VectorsTo(&vectors)
|
||||
|
||||
// // Extract variances of the principal components
|
||||
// var variances []float64
|
||||
// variances = pca.VarsTo(variances)
|
||||
|
||||
W := vectors.Slice(0, c, 0, 2).(*mat.Dense)
|
||||
|
||||
// Perform PCA reduction
|
||||
var reducedData mat.Dense
|
||||
reducedData.Mul(X, W)
|
||||
|
||||
for i, s := range inputs {
|
||||
row := reducedData.RowView(i)
|
||||
fmt.Print(i+1, ". ", s, "\n")
|
||||
fmt.Printf("[%v, %v]\n\n", row.AtVec(0), row.AtVec(1))
|
||||
}
|
||||
|
||||
points := make(plotter.XYs, reducedData.RawMatrix().Rows)
|
||||
for i := range len(points) {
|
||||
row := reducedData.RowView(i)
|
||||
points[i].X = row.AtVec(0)
|
||||
points[i].Y = row.AtVec(1)
|
||||
}
|
||||
|
||||
// Create a new plot
|
||||
p := plot.New()
|
||||
|
||||
// Set plot title and axis labels
|
||||
p.Title.Text = "Embedding Map"
|
||||
|
||||
// Create a scatter plot of the points
|
||||
s, err := plotter.NewScatter(points)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
p.Add(s)
|
||||
|
||||
/// Create labels plotter and add it to the plot
|
||||
|
||||
labels := make([]string, reducedData.RawMatrix().Rows)
|
||||
for i := range len(labels) {
|
||||
labels[i] = fmt.Sprintf("%d", i+1)
|
||||
}
|
||||
|
||||
// plotter := plotter
|
||||
|
||||
l, err := plotter.NewLabels(plotter.XYLabels{XYs: points, Labels: labels})
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
p.Add(l)
|
||||
|
||||
// Make the grid square
|
||||
p.X.Min = -1
|
||||
p.X.Max = 1
|
||||
p.Y.Min = -1
|
||||
p.Y.Max = 1
|
||||
|
||||
// Set the aspect ratio to be 1:1
|
||||
p.X.Tick.Marker = plot.ConstantTicks([]plot.Tick{
|
||||
{Value: -1, Label: "-1"},
|
||||
{Value: -0.5, Label: "-0.5"},
|
||||
{Value: 0, Label: "0"},
|
||||
{Value: 0.5, Label: "0.5"},
|
||||
{Value: 1, Label: "1"},
|
||||
})
|
||||
p.Y.Tick.Marker = plot.ConstantTicks([]plot.Tick{
|
||||
{Value: -1, Label: "-1"},
|
||||
{Value: -0.5, Label: "-0.5"},
|
||||
{Value: 0, Label: "0"},
|
||||
{Value: 0.5, Label: "0.5"},
|
||||
{Value: 1, Label: "1"},
|
||||
})
|
||||
|
||||
// Save the plot to a svg file
|
||||
if err := p.Save(6*vg.Inch, 6*vg.Inch, "plot.svg"); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
// open the plot
|
||||
open := exec.Command("open", "plot.svg")
|
||||
err = open.Run()
|
||||
if err != nil {
|
||||
fmt.Println("error: couldn't open plot")
|
||||
return err
|
||||
}
|
||||
|
||||
// Wait for Enter key press
|
||||
fmt.Print("Press 'Enter' to continue")
|
||||
reader := bufio.NewReader(os.Stdin)
|
||||
_, _ = reader.ReadString('\n')
|
||||
|
||||
// close and delete the plot (defer this)
|
||||
defer func() {
|
||||
delete := exec.Command("rm", "plot.svg")
|
||||
err = delete.Run()
|
||||
if err != nil {
|
||||
fmt.Println("error: couldn't delete plot")
|
||||
}
|
||||
}()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func generate(cmd *cobra.Command, opts runOptions) error {
|
||||
client, err := api.ClientFromEnvironment()
|
||||
if err != nil {
|
||||
@@ -1486,26 +1247,11 @@ func NewCLI() *cobra.Command {
|
||||
RunE: RunHandler,
|
||||
}
|
||||
|
||||
embedCmd := &cobra.Command{
|
||||
Use: "embed MODEL [PROMPT]",
|
||||
Short: "Embed a model",
|
||||
Args: cobra.MinimumNArgs(1),
|
||||
PreRunE: checkServerHeartbeat,
|
||||
RunE: EmbedHandler,
|
||||
}
|
||||
|
||||
runCmd.Flags().String("keepalive", "", "Duration to keep a model loaded (e.g. 5m)")
|
||||
runCmd.Flags().Bool("verbose", false, "Show timings for response")
|
||||
runCmd.Flags().Bool("insecure", false, "Use an insecure registry")
|
||||
runCmd.Flags().Bool("nowordwrap", false, "Don't wrap words to the next line automatically")
|
||||
runCmd.Flags().String("format", "", "Response format (e.g. json)")
|
||||
|
||||
embedCmd.Flags().String("keepalive", "", "Duration to keep a model loaded (e.g. 5m)")
|
||||
embedCmd.Flags().Bool("verbose", false, "Show timings for response")
|
||||
embedCmd.Flags().Bool("insecure", false, "Use an insecure registry")
|
||||
embedCmd.Flags().Bool("nowordwrap", false, "Don't wrap words to the next line automatically")
|
||||
embedCmd.Flags().String("format", "", "Response format (e.g. json)")
|
||||
|
||||
serveCmd := &cobra.Command{
|
||||
Use: "serve",
|
||||
Aliases: []string{"start"},
|
||||
@@ -1580,7 +1326,6 @@ func NewCLI() *cobra.Command {
|
||||
copyCmd,
|
||||
deleteCmd,
|
||||
serveCmd,
|
||||
embedCmd,
|
||||
} {
|
||||
switch cmd {
|
||||
case runCmd:
|
||||
@@ -1616,7 +1361,6 @@ func NewCLI() *cobra.Command {
|
||||
psCmd,
|
||||
copyCmd,
|
||||
deleteCmd,
|
||||
embedCmd,
|
||||
)
|
||||
|
||||
return rootCmd
|
||||
|
||||
@@ -375,9 +375,9 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
|
||||
return err
|
||||
}
|
||||
req := &api.ShowRequest{
|
||||
Name: opts.Model,
|
||||
System: opts.System,
|
||||
Options: opts.Options,
|
||||
Name: opts.Model,
|
||||
System: opts.System,
|
||||
Options: opts.Options,
|
||||
}
|
||||
resp, err := client.Show(cmd.Context(), req)
|
||||
if err != nil {
|
||||
@@ -466,7 +466,6 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
|
||||
}
|
||||
|
||||
sb.WriteString(line)
|
||||
|
||||
default:
|
||||
sb.WriteString(line)
|
||||
}
|
||||
|
||||
119
docs/api.md
119
docs/api.md
@@ -40,6 +40,7 @@ Generate a response for a given prompt with a provided model. This is a streamin
|
||||
|
||||
- `model`: (required) the [model name](#model-names)
|
||||
- `prompt`: the prompt to generate a response for
|
||||
- `suffix`: the text after the model response
|
||||
- `images`: (optional) a list of base64-encoded images (for multimodal models such as `llava`)
|
||||
|
||||
Advanced parameters (optional):
|
||||
@@ -57,7 +58,8 @@ Advanced parameters (optional):
|
||||
|
||||
Enable JSON mode by setting the `format` parameter to `json`. This will structure the response as a valid JSON object. See the JSON mode [example](#request-json-mode) below.
|
||||
|
||||
> Note: it's important to instruct the model to use JSON in the `prompt`. Otherwise, the model may generate large amounts whitespace.
|
||||
> [!IMPORTANT]
|
||||
> It's important to instruct the model to use JSON in the `prompt`. Otherwise, the model may generate large amounts whitespace.
|
||||
|
||||
### Examples
|
||||
|
||||
@@ -148,8 +150,44 @@ If `stream` is set to `false`, the response will be a single JSON object:
|
||||
}
|
||||
```
|
||||
|
||||
#### Request (with suffix)
|
||||
|
||||
##### Request
|
||||
|
||||
```shell
|
||||
curl http://localhost:11434/api/generate -d '{
|
||||
"model": "codellama:code",
|
||||
"prompt": "def compute_gcd(a, b):",
|
||||
"suffix": " return result",
|
||||
"options": {
|
||||
"temperature": 0
|
||||
},
|
||||
"stream": false
|
||||
}'
|
||||
```
|
||||
|
||||
##### Response
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "codellama:code",
|
||||
"created_at": "2024-07-22T20:47:51.147561Z",
|
||||
"response": "\n if a == 0:\n return b\n else:\n return compute_gcd(b % a, a)\n\ndef compute_lcm(a, b):\n result = (a * b) / compute_gcd(a, b)\n",
|
||||
"done": true,
|
||||
"done_reason": "stop",
|
||||
"context": [...],
|
||||
"total_duration": 1162761250,
|
||||
"load_duration": 6683708,
|
||||
"prompt_eval_count": 17,
|
||||
"prompt_eval_duration": 201222000,
|
||||
"eval_count": 63,
|
||||
"eval_duration": 953997000
|
||||
}
|
||||
```
|
||||
|
||||
#### Request (JSON mode)
|
||||
|
||||
> [!IMPORTANT]
|
||||
> When `format` is set to `json`, the output will always be a well-formed JSON object. It's important to also instruct the model to respond in JSON.
|
||||
|
||||
##### Request
|
||||
@@ -380,12 +418,14 @@ Generate the next message in a chat with a provided model. This is a streaming e
|
||||
|
||||
- `model`: (required) the [model name](#model-names)
|
||||
- `messages`: the messages of the chat, this can be used to keep a chat memory
|
||||
- `tools`: tools for the model to use if supported. Requires `stream` to be set to `false`
|
||||
|
||||
The `message` object has the following fields:
|
||||
|
||||
- `role`: the role of the message, either `system`, `user` or `assistant`
|
||||
- `role`: the role of the message, either `system`, `user`, `assistant`, or `tool`
|
||||
- `content`: the content of the message
|
||||
- `images` (optional): a list of images to include in the message (for multimodal models such as `llava`)
|
||||
- `tool_calls` (optional): a list of tools the model wants to use
|
||||
|
||||
Advanced parameters (optional):
|
||||
|
||||
@@ -622,6 +662,79 @@ curl http://localhost:11434/api/chat -d '{
|
||||
}
|
||||
```
|
||||
|
||||
#### Chat request (with tools)
|
||||
|
||||
##### Request
|
||||
|
||||
```
|
||||
curl http://localhost:11434/api/chat -d '{
|
||||
"model": "mistral",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What is the weather today in Paris?"
|
||||
}
|
||||
],
|
||||
"stream": false,
|
||||
"tools": [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_current_weather",
|
||||
"description": "Get the current weather for a location",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The location to get the weather for, e.g. San Francisco, CA"
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"description": "The format to return the weather in, e.g. 'celsius' or 'fahrenheit'",
|
||||
"enum": ["celsius", "fahrenheit"]
|
||||
}
|
||||
},
|
||||
"required": ["location", "format"]
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}'
|
||||
```
|
||||
|
||||
##### Response
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "mistral:7b-instruct-v0.3-q4_K_M",
|
||||
"created_at": "2024-07-22T20:33:28.123648Z",
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"tool_calls": [
|
||||
{
|
||||
"function": {
|
||||
"name": "get_current_weather",
|
||||
"arguments": {
|
||||
"format": "celsius",
|
||||
"location": "Paris, FR"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"done_reason": "stop",
|
||||
"done": true,
|
||||
"total_duration": 885095291,
|
||||
"load_duration": 3753500,
|
||||
"prompt_eval_count": 122,
|
||||
"prompt_eval_duration": 328493000,
|
||||
"eval_count": 33,
|
||||
"eval_duration": 552222000
|
||||
}
|
||||
```
|
||||
|
||||
## Create a Model
|
||||
|
||||
```shell
|
||||
@@ -1173,4 +1286,4 @@ curl http://localhost:11434/api/embeddings -d '{
|
||||
0.8785552978515625, -0.34576427936553955, 0.5742510557174683, -0.04222835972905159, -0.137906014919281
|
||||
]
|
||||
}
|
||||
```
|
||||
```
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
# Ollama Model File
|
||||
|
||||
> Note: `Modelfile` syntax is in development
|
||||
> [!NOTE]
|
||||
> `Modelfile` syntax is in development
|
||||
|
||||
A model file is the blueprint to create and share models with Ollama.
|
||||
|
||||
|
||||
@@ -78,8 +78,8 @@ curl http://localhost:11434/v1/chat/completions \
|
||||
- [x] Streaming
|
||||
- [x] JSON mode
|
||||
- [x] Reproducible outputs
|
||||
- [x] Tools (streaming support coming soon)
|
||||
- [ ] Vision
|
||||
- [ ] Function calling
|
||||
- [ ] Logprobs
|
||||
|
||||
#### Supported request fields
|
||||
@@ -97,9 +97,9 @@ curl http://localhost:11434/v1/chat/completions \
|
||||
- [x] `temperature`
|
||||
- [x] `top_p`
|
||||
- [x] `max_tokens`
|
||||
- [ ] `logit_bias`
|
||||
- [ ] `tools`
|
||||
- [x] `tools`
|
||||
- [ ] `tool_choice`
|
||||
- [ ] `logit_bias`
|
||||
- [ ] `user`
|
||||
- [ ] `n`
|
||||
|
||||
|
||||
173
docs/template.md
Normal file
173
docs/template.md
Normal file
@@ -0,0 +1,173 @@
|
||||
# Template
|
||||
|
||||
Ollama provides a powerful templating engine backed by Go's built-in templating engine to construct prompts for your large language model. This feature is a valuable tool to get the most out of your models.
|
||||
|
||||
## Basic Template Structure
|
||||
|
||||
A basic Go template consists of three main parts:
|
||||
|
||||
* **Layout**: The overall structure of the template.
|
||||
* **Variables**: Placeholders for dynamic data that will be replaced with actual values when the template is rendered.
|
||||
* **Functions**: Custom functions or logic that can be used to manipulate the template's content.
|
||||
|
||||
Here's an example of a simple chat template:
|
||||
|
||||
```gotmpl
|
||||
{{- range .Messages }}
|
||||
{{ .Role }}: {{ .Content }}
|
||||
{{- end }}
|
||||
```
|
||||
|
||||
In this example, we have:
|
||||
|
||||
* A basic messages structure (layout)
|
||||
* Three variables: `Messages`, `Role`, and `Content` (variables)
|
||||
* A custom function (action) that iterates over an array of items (`range .Messages`) and displays each item
|
||||
|
||||
## Adding templates to your model
|
||||
|
||||
By default, models imported into Ollama have a default template of `{{ .Prompt }}`, i.e. user inputs are sent verbatim to the LLM. This is appropriate for text or code completion models but lacks essential markers for chat or instruction models.
|
||||
|
||||
Omitting a template in these models puts the responsibility of correctly templating input onto the user. Adding a template allows users to easily get the best results from the model.
|
||||
|
||||
To add templates in your model, you'll need to add a `TEMPLATE` command to the Modelfile. Here's an example using Meta's Llama 3.
|
||||
|
||||
```dockerfile
|
||||
FROM llama3
|
||||
|
||||
TEMPLATE """{{- if .System }}<|start_header_id|>system<|end_header_id|>
|
||||
|
||||
{{ .System }}<|eot_id|>
|
||||
{{- end }}
|
||||
{{- range .Messages }}<|start_header_id|>{{ .Role }}<|end_header_id|>
|
||||
|
||||
{{ .Content }}<|eot_id|>
|
||||
{{- end }}<|start_header_id|>assistant<|end_header_id|>
|
||||
|
||||
"""
|
||||
```
|
||||
|
||||
## Variables
|
||||
|
||||
`System` (string): system prompt
|
||||
|
||||
`Prompt` (string): user prompt
|
||||
|
||||
`Response` (string): assistant response
|
||||
|
||||
`Suffix` (string): text inserted after the assistant's response
|
||||
|
||||
`Messages` (list): list of messages
|
||||
|
||||
`Messages[].Role` (string): role which can be one of `system`, `user`, `assistant`, or `tool`
|
||||
|
||||
`Messages[].Content` (string): message content
|
||||
|
||||
`Messages[].ToolCalls` (list): list of tools the model wants to call
|
||||
|
||||
`Messages[].ToolCalls[].Function` (object): function to call
|
||||
|
||||
`Messages[].ToolCalls[].Function.Name` (string): function name
|
||||
|
||||
`Messages[].ToolCalls[].Function.Arguments` (map): mapping of argument name to argument value
|
||||
|
||||
`Tools` (list): list of tools the model can access
|
||||
|
||||
`Tools[].Type` (string): schema type. `type` is always `function`
|
||||
|
||||
`Tools[].Function` (object): function definition
|
||||
|
||||
`Tools[].Function.Name` (string): function name
|
||||
|
||||
`Tools[].Function.Description` (string): function description
|
||||
|
||||
`Tools[].Function.Parameters` (object): function parameters
|
||||
|
||||
`Tools[].Function.Parameters.Type` (string): schema type. `type` is always `object`
|
||||
|
||||
`Tools[].Function.Parameters.Required` (list): list of required properties
|
||||
|
||||
`Tools[].Function.Parameters.Properties` (map): mapping of property name to property definition
|
||||
|
||||
`Tools[].Function.Parameters.Properties[].Type` (string): property type
|
||||
|
||||
`Tools[].Function.Parameters.Properties[].Description` (string): property description
|
||||
|
||||
`Tools[].Function.Parameters.Properties[].Enum` (list): list of valid values
|
||||
|
||||
## Tips and Best Practices
|
||||
|
||||
Keep the following tips and best practices in mind when working with Go templates:
|
||||
|
||||
* **Be mindful of dot**: Control flow structures like `range` and `with` changes the value `.`
|
||||
* **Out-of-scope variables**: Use `$.` to reference variables not currently in scope, starting from the root
|
||||
* **Whitespace control**: Use `-` to trim leading (`{{-`) and trailing (`-}}`) whitespace
|
||||
|
||||
## Examples
|
||||
|
||||
### Example Messages
|
||||
|
||||
#### ChatML
|
||||
|
||||
ChatML is a popular template format. It can be used for models such as Databrick's DBRX, Intel's Neural Chat, and Microsoft's Orca 2.
|
||||
|
||||
```gotmpl
|
||||
{{- if .System }}<|im_start|>system
|
||||
{{ .System }}<|im_end|>
|
||||
{{ end }}
|
||||
{{- range .Messages }}<|im_start|>{{ .Role }}
|
||||
{{ .Content }}<|im_end|>
|
||||
{{ end }}<|im_start|>assistant
|
||||
{{ else }}
|
||||
{{ if .System }}<|im_start|>system
|
||||
{{ .System }}<|im_end|>
|
||||
```
|
||||
|
||||
### Example Tools
|
||||
|
||||
Tools support can be added to a model by adding a `{{ .Tools }}` node to the template. This feature is useful for models trained to call external tools and can a powerful tool for retrieving real-time data or performing complex tasks.
|
||||
|
||||
#### Mistral
|
||||
|
||||
Mistral v0.3 and Mixtral 8x22B supports tool calling.
|
||||
|
||||
```gotmpl
|
||||
{{- range $index, $_ := .Messages }}
|
||||
{{- if eq .Role "user" }}
|
||||
{{- if and (le (len (slice $.Messages $index)) 2) $.Tools }}[AVAILABLE_TOOLS] {{ json $.Tools }}[/AVAILABLE_TOOLS]
|
||||
{{- end }}[INST] {{ if and (eq (len (slice $.Messages $index)) 1) $.System }}{{ $.System }}
|
||||
|
||||
{{ end }}{{ .Content }}[/INST]
|
||||
{{- else if eq .Role "assistant" }}
|
||||
{{- if .Content }} {{ .Content }}</s>
|
||||
{{- else if .ToolCalls }}[TOOL_CALLS] [
|
||||
{{- range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ json .Function.Arguments }}}
|
||||
{{- end }}]</s>
|
||||
{{- end }}
|
||||
{{- else if eq .Role "tool" }}[TOOL_RESULTS] {"content": {{ .Content }}}[/TOOL_RESULTS]
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
```
|
||||
|
||||
### Example Fill-in-Middle
|
||||
|
||||
Fill-in-middle support can be added to a model by adding a `{{ .Suffix }}` node to the template. This feature is useful for models that are trained to generate text in the middle of user input, such as code completion models.
|
||||
|
||||
#### CodeLlama
|
||||
|
||||
CodeLlama [7B](https://ollama.com/library/codellama:7b-code) and [13B](https://ollama.com/library/codellama:13b-code) code completion models support fill-in-middle.
|
||||
|
||||
```gotmpl
|
||||
<PRE> {{ .Prompt }} <SUF>{{ .Suffix }} <MID>
|
||||
```
|
||||
|
||||
> [!NOTE]
|
||||
> CodeLlama 34B and 70B code completion and all instruct and Python fine-tuned models do not support fill-in-middle.
|
||||
|
||||
#### Codestral
|
||||
|
||||
Codestral [22B](https://ollama.com/library/codestral:22b) supports fill-in-middle.
|
||||
|
||||
```gotmpl
|
||||
[SUFFIX]{{ .Suffix }}[PREFIX] {{ .Prompt }}
|
||||
```
|
||||
20
go.mod
20
go.mod
@@ -25,45 +25,25 @@ require (
|
||||
)
|
||||
|
||||
require (
|
||||
gioui.org v0.2.0 // indirect
|
||||
gioui.org/cpu v0.0.0-20220412190645-f1e9e8c3b1f7 // indirect
|
||||
gioui.org/shader v1.0.6 // indirect
|
||||
gioui.org/x v0.2.0 // indirect
|
||||
git.sr.ht/~sbinet/gg v0.5.0 // indirect
|
||||
github.com/ajstarks/svgo v0.0.0-20211024235047-1546f124cd8b // indirect
|
||||
github.com/andybalholm/stroke v0.0.0-20221221101821-bd29b49d73f0 // indirect
|
||||
github.com/apache/arrow/go/arrow v0.0.0-20211112161151-bc219186db40 // indirect
|
||||
github.com/bytedance/sonic/loader v0.1.1 // indirect
|
||||
github.com/campoy/embedmd v1.0.0 // indirect
|
||||
github.com/chewxy/hm v1.0.0 // indirect
|
||||
github.com/chewxy/math32 v1.10.1 // indirect
|
||||
github.com/cloudwego/base64x v0.1.4 // indirect
|
||||
github.com/cloudwego/iasm v0.2.0 // indirect
|
||||
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||
github.com/gizak/termui/v3 v3.1.0 // indirect
|
||||
github.com/go-fonts/liberation v0.3.2 // indirect
|
||||
github.com/go-latex/latex v0.0.0-20231108140139-5c1ce85aa4ea // indirect
|
||||
github.com/go-pdf/fpdf v0.9.0 // indirect
|
||||
github.com/go-text/typesetting v0.0.0-20230803102845-24e03d8b5372 // indirect
|
||||
github.com/gogo/protobuf v1.3.2 // indirect
|
||||
github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 // indirect
|
||||
github.com/google/flatbuffers v24.3.25+incompatible // indirect
|
||||
github.com/kr/text v0.2.0 // indirect
|
||||
github.com/mitchellh/go-wordwrap v0.0.0-20150314170334-ad45545899c7 // indirect
|
||||
github.com/nsf/termbox-go v0.0.0-20190121233118-02980233997d // indirect
|
||||
github.com/pkg/errors v0.9.1 // indirect
|
||||
github.com/pmezard/go-difflib v1.0.0 // indirect
|
||||
github.com/rivo/uniseg v0.2.0 // indirect
|
||||
github.com/xtgo/set v1.0.0 // indirect
|
||||
go4.org/unsafe/assume-no-moving-gc v0.0.0-20231121144256-b99613f794b6 // indirect
|
||||
golang.org/x/exp/shiny v0.0.0-20230801115018-d63ba01acd4b // indirect
|
||||
golang.org/x/image v0.14.0 // indirect
|
||||
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect
|
||||
gonum.org/v1/gonum v0.15.0 // indirect
|
||||
gonum.org/v1/plot v0.14.0 // indirect
|
||||
gorgonia.org/vecf32 v0.9.0 // indirect
|
||||
gorgonia.org/vecf64 v0.9.0 // indirect
|
||||
rsc.io/pdf v0.1.1 // indirect
|
||||
)
|
||||
|
||||
require (
|
||||
|
||||
45
go.sum
45
go.sum
@@ -2,28 +2,11 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMT
|
||||
cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
|
||||
dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
|
||||
gioui.org v0.0.0-20210308172011-57750fc8a0a6/go.mod h1:RSH6KIUZ0p2xy5zHDxgAM4zumjgTw83q2ge/PI+yyw8=
|
||||
gioui.org v0.2.0 h1:RbzDn1h/pCVf/q44ImQSa/J3MIFpY3OWphzT/Tyei+w=
|
||||
gioui.org v0.2.0/go.mod h1:1H72sKEk/fNFV+l0JNeM2Dt3co3Y4uaQcD+I+/GQ0e4=
|
||||
gioui.org/cpu v0.0.0-20210808092351-bfe733dd3334/go.mod h1:A8M0Cn5o+vY5LTMlnRoK3O5kG+rH0kWfJjeKd9QpBmQ=
|
||||
gioui.org/cpu v0.0.0-20220412190645-f1e9e8c3b1f7 h1:tNJdnP5CgM39PRc+KWmBRRYX/zJ+rd5XaYxY5d5veqA=
|
||||
gioui.org/cpu v0.0.0-20220412190645-f1e9e8c3b1f7/go.mod h1:A8M0Cn5o+vY5LTMlnRoK3O5kG+rH0kWfJjeKd9QpBmQ=
|
||||
gioui.org/shader v1.0.6 h1:cvZmU+eODFR2545X+/8XucgZdTtEjR3QWW6W65b0q5Y=
|
||||
gioui.org/shader v1.0.6/go.mod h1:mWdiME581d/kV7/iEhLmUgUK5iZ09XR5XpduXzbePVM=
|
||||
gioui.org/x v0.2.0 h1:/MbdjKH19F16auv19UiQxli2n6BYPw7eyh9XBOTgmEw=
|
||||
gioui.org/x v0.2.0/go.mod h1:rCGN2nZ8ZHqrtseJoQxCMZpt2xrZUrdZ2WuMRLBJmYs=
|
||||
git.sr.ht/~sbinet/gg v0.5.0 h1:6V43j30HM623V329xA9Ntq+WJrMjDxRjuAB1LFWF5m8=
|
||||
git.sr.ht/~sbinet/gg v0.5.0/go.mod h1:G2C0eRESqlKhS7ErsNey6HHrqU1PwsnCQlekFi9Q2Oo=
|
||||
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
|
||||
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
|
||||
github.com/agnivade/levenshtein v1.1.1 h1:QY8M92nrzkmr798gCo3kmMyqXFzdQVpxLlGPRBij0P8=
|
||||
github.com/agnivade/levenshtein v1.1.1/go.mod h1:veldBMzWxcCG2ZvUTKD2kJNRdCk5hVbJomOvKkmgYbo=
|
||||
github.com/ajstarks/deck v0.0.0-20200831202436-30c9fc6549a9/go.mod h1:JynElWSGnm/4RlzPXRlREEwqTHAN3T56Bv2ITsFT3gY=
|
||||
github.com/ajstarks/deck/generate v0.0.0-20210309230005-c3f852c02e19/go.mod h1:T13YZdzov6OU0A1+RfKZiZN9ca6VeKdBdyDV+BY97Tk=
|
||||
github.com/ajstarks/svgo v0.0.0-20180226025133-644b8db467af/go.mod h1:K08gAheRH3/J6wwsYMMT4xOr94bZjxIelGM0+d/wbFw=
|
||||
github.com/ajstarks/svgo v0.0.0-20211024235047-1546f124cd8b h1:slYM766cy2nI3BwyRiyQj/Ud48djTMtMebDqepE95rw=
|
||||
github.com/ajstarks/svgo v0.0.0-20211024235047-1546f124cd8b/go.mod h1:1KcenG0jGWcpt8ov532z81sp/kMMUG485J2InIOyADM=
|
||||
github.com/andybalholm/stroke v0.0.0-20221221101821-bd29b49d73f0 h1:uF5Q/hWnDU1XZeT6CsrRSxHLroUSEYYO3kgES+yd+So=
|
||||
github.com/andybalholm/stroke v0.0.0-20221221101821-bd29b49d73f0/go.mod h1:ccdDYaY5+gO+cbnQdFxEXqfy0RkoV25H3jLXUDNM3wg=
|
||||
github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY=
|
||||
github.com/apache/arrow/go/arrow v0.0.0-20211112161151-bc219186db40 h1:q4dksr6ICHXqG5hm0ZW5IHyeEJXoIJSOZeBLmWPNeIQ=
|
||||
github.com/apache/arrow/go/arrow v0.0.0-20211112161151-bc219186db40/go.mod h1:Q7yQnSMnLvcXlZ8RV+jwz/6y1rQTqbX6C82SndT52Zs=
|
||||
@@ -34,8 +17,6 @@ github.com/bytedance/sonic v1.11.6 h1:oUp34TzMlL+OY1OUWxHqsdkgC/Zfc85zGqw9siXjrc
|
||||
github.com/bytedance/sonic v1.11.6/go.mod h1:LysEHSvpvDySVdC2f87zGWf6CIKJcAvqab1ZaiQtds4=
|
||||
github.com/bytedance/sonic/loader v0.1.1 h1:c+e5Pt1k/cy5wMveRDyk2X4B9hF4g7an8N3zCYjJFNM=
|
||||
github.com/bytedance/sonic/loader v0.1.1/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU=
|
||||
github.com/campoy/embedmd v1.0.0 h1:V4kI2qTJJLf4J29RzI/MAt2c3Bl4dQSYPuflzwFH2hY=
|
||||
github.com/campoy/embedmd v1.0.0/go.mod h1:oxyr9RCiSXg0M3VJ3ks0UGfp98BpSSGr0kpiX3MzVl8=
|
||||
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
|
||||
github.com/chewxy/hm v1.0.0 h1:zy/TSv3LV2nD3dwUEQL2VhXeoXbb9QkpmdRAVUFiA6k=
|
||||
github.com/chewxy/hm v1.0.0/go.mod h1:qg9YI4q6Fkj/whwHR1D+bOGeF7SniIP40VweVepLjg0=
|
||||
@@ -81,20 +62,12 @@ github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE
|
||||
github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI=
|
||||
github.com/gin-gonic/gin v1.10.0 h1:nTuyha1TYqgedzytsKYqna+DfLos46nTv2ygFy86HFU=
|
||||
github.com/gin-gonic/gin v1.10.0/go.mod h1:4PMNQiOhvDRa013RKVbsiNwoyezlm2rm0uX/T7kzp5Y=
|
||||
github.com/gizak/termui/v3 v3.1.0 h1:ZZmVDgwHl7gR7elfKf1xc4IudXZ5qqfDh4wExk4Iajc=
|
||||
github.com/gizak/termui/v3 v3.1.0/go.mod h1:bXQEBkJpzxUAKf0+xq9MSWAvWZlE7c+aidmyFlkYTrY=
|
||||
github.com/go-fonts/dejavu v0.1.0/go.mod h1:4Wt4I4OU2Nq9asgDCteaAaWZOV24E+0/Pwo0gppep4g=
|
||||
github.com/go-fonts/latin-modern v0.2.0/go.mod h1:rQVLdDMK+mK1xscDwsqM5J8U2jrRa3T0ecnM9pNujks=
|
||||
github.com/go-fonts/liberation v0.1.1/go.mod h1:K6qoJYypsmfVjWg8KOVDQhLc8UDgIK2HYqyqAO9z7GY=
|
||||
github.com/go-fonts/liberation v0.3.2 h1:XuwG0vGHFBPRRI8Qwbi5tIvR3cku9LUfZGq/Ar16wlQ=
|
||||
github.com/go-fonts/liberation v0.3.2/go.mod h1:N0QsDLVUQPy3UYg9XAc3Uh3UDMp2Z7M1o4+X98dXkmI=
|
||||
github.com/go-fonts/stix v0.1.0/go.mod h1:w/c1f0ldAUlJmLBvlbkvVXLAD+tAMqobIIQpmnUIzUY=
|
||||
github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU=
|
||||
github.com/go-latex/latex v0.0.0-20210118124228-b3d85cf34e07/go.mod h1:CO1AlKB2CSIqUrmQPqA0gdRIlnLEY0gK5JGjh37zN5U=
|
||||
github.com/go-latex/latex v0.0.0-20231108140139-5c1ce85aa4ea h1:DfZQkvEbdmOe+JK2TMtBM+0I9GSdzE2y/L1/AmD8xKc=
|
||||
github.com/go-latex/latex v0.0.0-20231108140139-5c1ce85aa4ea/go.mod h1:Y7Vld91/HRbTBm7JwoI7HejdDB0u+e9AUBO9MB7yuZk=
|
||||
github.com/go-pdf/fpdf v0.9.0 h1:PPvSaUuo1iMi9KkaAn90NuKi+P4gwMedWPHhj8YlJQw=
|
||||
github.com/go-pdf/fpdf v0.9.0/go.mod h1:oO8N111TkmKb9D7VvWGLvLJlaZUQVPM+6V42pp3iV4Y=
|
||||
github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s=
|
||||
github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
|
||||
github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA=
|
||||
@@ -103,13 +76,10 @@ github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJn
|
||||
github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY=
|
||||
github.com/go-playground/validator/v10 v10.20.0 h1:K9ISHbSaI0lyB2eWMPJo+kOS/FBExVwjEviJTixqxL8=
|
||||
github.com/go-playground/validator/v10 v10.20.0/go.mod h1:dbuPbCMFw/DrkbEynArYaCwl3amGuJotoKCe95atGMM=
|
||||
github.com/go-text/typesetting v0.0.0-20230803102845-24e03d8b5372 h1:FQivqchis6bE2/9uF70M2gmmLpe82esEm2QadL0TEJo=
|
||||
github.com/go-text/typesetting v0.0.0-20230803102845-24e03d8b5372/go.mod h1:evDBbvNR/KaVFZ2ZlDSOWWXIUKq0wCOEtzLxRM8SG3k=
|
||||
github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
|
||||
github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
|
||||
github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
|
||||
github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
|
||||
github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 h1:DACJavvAHhabrF08vX0COfcOBJRhZ8lUbR+ZWIs0Y5g=
|
||||
github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k=
|
||||
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
|
||||
github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
|
||||
@@ -168,12 +138,9 @@ github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ=
|
||||
github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI=
|
||||
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
|
||||
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
|
||||
github.com/mattn/go-runewidth v0.0.2/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU=
|
||||
github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
|
||||
github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWVwUuU=
|
||||
github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
|
||||
github.com/mitchellh/go-wordwrap v0.0.0-20150314170334-ad45545899c7 h1:DpOJ2HYzCv8LZP15IdmG+YdwD2luVPHITV96TkirNBM=
|
||||
github.com/mitchellh/go-wordwrap v0.0.0-20150314170334-ad45545899c7/go.mod h1:ZXFpozHsX6DPmq2I0TCekCxypsnAUbP2oI0UX1GXzOo=
|
||||
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
||||
@@ -181,8 +148,6 @@ github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9G
|
||||
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
|
||||
github.com/nlpodyssey/gopickle v0.3.0 h1:BLUE5gxFLyyNOPzlXxt6GoHEMMxD0qhsE4p0CIQyoLw=
|
||||
github.com/nlpodyssey/gopickle v0.3.0/go.mod h1:f070HJ/yR+eLi5WmM1OXJEGaTpuJEUiib19olXgYha0=
|
||||
github.com/nsf/termbox-go v0.0.0-20190121233118-02980233997d h1:x3S6kxmy49zXVVyhcnrFqxvNVCBPb2KZ9hV2RBdS840=
|
||||
github.com/nsf/termbox-go v0.0.0-20190121233118-02980233997d/go.mod h1:IuKpRQcYE1Tfu+oAQqaLisqDeXgjyyltCfsaoYN18NQ=
|
||||
github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec=
|
||||
github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY=
|
||||
github.com/pdevine/tensor v0.0.0-20240510204454-f88f4562727c h1:GwiUUjKefgvSNmv3NCvI/BL0kDebW6Xa+kcdpdc1mTY=
|
||||
@@ -256,8 +221,6 @@ golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL
|
||||
golang.org/x/exp v0.0.0-20191002040644-a1355ae1e2c3/go.mod h1:NOZ3BPKG0ec/BKJQgnvsSFpcKLM5xXVWnvZS97DWHgE=
|
||||
golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa h1:FRnLl4eNAQl8hwxVVC17teOw8kdjVDVAiFMtgUdTSRQ=
|
||||
golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa/go.mod h1:zk2irFbV9DP96SEBUUAy67IdHUaZuSnrz1n472HUCLE=
|
||||
golang.org/x/exp/shiny v0.0.0-20230801115018-d63ba01acd4b h1:sgkbz1SFTsoQIvzTIw45hccUcGocu00QM3qucBYV8b0=
|
||||
golang.org/x/exp/shiny v0.0.0-20230801115018-d63ba01acd4b/go.mod h1:UH99kUObWAZkDnWqppdQe5ZhPYESUw8I0zVV1uWBR+0=
|
||||
golang.org/x/image v0.0.0-20180708004352-c73c2afc3b81/go.mod h1:ux5Hcp/YLpHSI86hEcLt0YII63i6oz57MZXIpbrjZUs=
|
||||
golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js=
|
||||
golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
|
||||
@@ -267,8 +230,6 @@ golang.org/x/image v0.0.0-20200430140353-33d19683fad8/go.mod h1:FeLwcggjj3mMvU+o
|
||||
golang.org/x/image v0.0.0-20200618115811-c13761719519/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
|
||||
golang.org/x/image v0.0.0-20201208152932-35266b937fa6/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
|
||||
golang.org/x/image v0.0.0-20210216034530-4410531fe030/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
|
||||
golang.org/x/image v0.14.0 h1:tNgSxAFe3jC4uYqvZdTr84SZoM1KfwdC9SKIFrLjFn4=
|
||||
golang.org/x/image v0.14.0/go.mod h1:HUYqC05R2ZcZ3ejNQsIHQDQiwWM4JBqmm6MKANTp4LE=
|
||||
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
|
||||
golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
|
||||
golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
|
||||
@@ -311,7 +272,6 @@ golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7w
|
||||
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210304124612-50617c2ba197/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
@@ -343,7 +303,6 @@ golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtn
|
||||
golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
|
||||
golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
|
||||
golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
|
||||
golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0=
|
||||
golang.org/x/tools v0.1.4/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
|
||||
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
@@ -358,8 +317,6 @@ gonum.org/v1/gonum v0.15.0/go.mod h1:xzZVBJBtS+Mz4q0Yl2LJTk+OxOg4jiXZ7qBoM0uISGo
|
||||
gonum.org/v1/netlib v0.0.0-20190313105609-8cb42192e0e0/go.mod h1:wa6Ws7BG/ESfp6dHfk7C6KdzKA7wR7u/rKwOGE66zvw=
|
||||
gonum.org/v1/plot v0.0.0-20190515093506-e2840ee46a6b/go.mod h1:Wt8AAjI+ypCyYX3nZBvf6cAIx93T+c/OS2HFAYskSZc=
|
||||
gonum.org/v1/plot v0.9.0/go.mod h1:3Pcqqmp6RHvJI72kgb8fThyUnav364FOsdDo2aGW5lY=
|
||||
gonum.org/v1/plot v0.14.0 h1:+LBDVFYwFe4LHhdP8coW6296MBEY4nQ+Y4vuUpJopcE=
|
||||
gonum.org/v1/plot v0.14.0/go.mod h1:MLdR9424SJed+5VqC6MsouEpig9pZX2VZ57H9ko2bXU=
|
||||
google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
|
||||
google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
|
||||
google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
|
||||
@@ -403,7 +360,5 @@ gorgonia.org/vecf64 v0.9.0 h1:bgZDP5x0OzBF64PjMGC3EvTdOoMEcmfAh1VCUnZFm1A=
|
||||
gorgonia.org/vecf64 v0.9.0/go.mod h1:hp7IOWCnRiVQKON73kkC/AUMtEXyf9kGlVrtPQ9ccVA=
|
||||
honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
|
||||
honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
|
||||
honnef.co/go/tools v0.1.3/go.mod h1:NgwopIslSNH47DimFoV78dnkksY2EFtX0ajyb3K/las=
|
||||
nullprogram.com/x/optparse v1.0.0/go.mod h1:KdyPE+Igbe0jQUrVfMqDMeJQIJZEuyV7pjYmp6pbG50=
|
||||
rsc.io/pdf v0.1.1 h1:k1MczvYDUvJBe93bYd7wrZLLUEcLZAuF824/I4e5Xr4=
|
||||
rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4=
|
||||
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"io"
|
||||
"log/slog"
|
||||
"math"
|
||||
"math/rand/v2"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
@@ -141,6 +142,32 @@ func (b *blobDownload) Run(ctx context.Context, requestURL *url.URL, opts *regis
|
||||
b.err = b.run(ctx, requestURL, opts)
|
||||
}
|
||||
|
||||
func newBackoff(maxBackoff time.Duration) func(ctx context.Context) error {
|
||||
var n int
|
||||
return func(ctx context.Context) error {
|
||||
if ctx.Err() != nil {
|
||||
return ctx.Err()
|
||||
}
|
||||
|
||||
n++
|
||||
|
||||
// n^2 backoff timer is a little smoother than the
|
||||
// common choice of 2^n.
|
||||
d := min(time.Duration(n*n)*10*time.Millisecond, maxBackoff)
|
||||
// Randomize the delay between 0.5-1.5 x msec, in order
|
||||
// to prevent accidental "thundering herd" problems.
|
||||
d = time.Duration(float64(d) * (rand.Float64() + 0.5))
|
||||
t := time.NewTimer(d)
|
||||
defer t.Stop()
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
case <-t.C:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *registryOptions) error {
|
||||
defer blobDownloadManager.Delete(b.Digest)
|
||||
ctx, b.CancelFunc = context.WithCancel(ctx)
|
||||
@@ -153,6 +180,52 @@ func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *regis
|
||||
|
||||
_ = file.Truncate(b.Total)
|
||||
|
||||
directURL, err := func() (*url.URL, error) {
|
||||
ctx, cancel := context.WithTimeout(ctx, 30*time.Second)
|
||||
defer cancel()
|
||||
|
||||
backoff := newBackoff(10 * time.Second)
|
||||
for {
|
||||
// shallow clone opts to be used in the closure
|
||||
// without affecting the outer opts.
|
||||
newOpts := new(registryOptions)
|
||||
*newOpts = *opts
|
||||
|
||||
newOpts.CheckRedirect = func(req *http.Request, via []*http.Request) error {
|
||||
if len(via) > 10 {
|
||||
return errors.New("maxium redirects exceeded (10) for directURL")
|
||||
}
|
||||
|
||||
// if the hostname is the same, allow the redirect
|
||||
if req.URL.Hostname() == requestURL.Hostname() {
|
||||
return nil
|
||||
}
|
||||
|
||||
// stop at the first redirect that is not
|
||||
// the same hostname as the original
|
||||
// request.
|
||||
return http.ErrUseLastResponse
|
||||
}
|
||||
|
||||
resp, err := makeRequestWithRetry(ctx, http.MethodGet, requestURL, nil, nil, newOpts)
|
||||
if err != nil {
|
||||
slog.Warn("failed to get direct URL; backing off and retrying", "err", err)
|
||||
if err := backoff(ctx); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
continue
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode != http.StatusTemporaryRedirect {
|
||||
return nil, fmt.Errorf("unexpected status code %d", resp.StatusCode)
|
||||
}
|
||||
return resp.Location()
|
||||
}
|
||||
}()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
g, inner := errgroup.WithContext(ctx)
|
||||
g.SetLimit(numDownloadParts)
|
||||
for i := range b.Parts {
|
||||
@@ -165,7 +238,7 @@ func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *regis
|
||||
var err error
|
||||
for try := 0; try < maxRetries; try++ {
|
||||
w := io.NewOffsetWriter(file, part.StartsAt())
|
||||
err = b.downloadChunk(inner, requestURL, w, part, opts)
|
||||
err = b.downloadChunk(inner, directURL, w, part, opts)
|
||||
switch {
|
||||
case errors.Is(err, context.Canceled), errors.Is(err, syscall.ENOSPC):
|
||||
// return immediately if the context is canceled or the device is out of space
|
||||
|
||||
@@ -54,6 +54,8 @@ type registryOptions struct {
|
||||
Username string
|
||||
Password string
|
||||
Token string
|
||||
|
||||
CheckRedirect func(req *http.Request, via []*http.Request) error
|
||||
}
|
||||
|
||||
type Model struct {
|
||||
@@ -1131,7 +1133,9 @@ func makeRequest(ctx context.Context, method string, requestURL *url.URL, header
|
||||
req.ContentLength = contentLength
|
||||
}
|
||||
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
resp, err := (&http.Client{
|
||||
CheckRedirect: regOpts.CheckRedirect,
|
||||
}).Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -263,13 +263,27 @@ func detectChatTemplate(layers []*layerGGML) ([]*layerGGML, error) {
|
||||
if t, err := template.Named(s); err != nil {
|
||||
slog.Debug("template detection", "error", err)
|
||||
} else {
|
||||
tmpl, err := NewLayer(t.Reader(), "application/vnd.ollama.image.template")
|
||||
layer, err := NewLayer(t.Reader(), "application/vnd.ollama.image.template")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
tmpl.status = fmt.Sprintf("using autodetected template %s", t.Name)
|
||||
layers = append(layers, &layerGGML{tmpl, nil})
|
||||
layer.status = fmt.Sprintf("using autodetected template %s", t.Name)
|
||||
layers = append(layers, &layerGGML{layer, nil})
|
||||
|
||||
if t.Parameters != nil {
|
||||
var b bytes.Buffer
|
||||
if err := json.NewEncoder(&b).Encode(t.Parameters); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
layer, err := NewLayer(&b, "application/vnd.ollama.image.params")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
layers = append(layers, &layerGGML{layer, nil})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -599,9 +599,10 @@ func TestCreateDetectTemplate(t *testing.T) {
|
||||
}
|
||||
|
||||
checkFileExists(t, filepath.Join(p, "blobs", "*"), []string{
|
||||
filepath.Join(p, "blobs", "sha256-0d79f567714c62c048378f2107fb332dabee0135d080c302d884317da9433cc5"),
|
||||
filepath.Join(p, "blobs", "sha256-553c4a3f747b3d22a4946875f1cc8ed011c2930d83f864a0c7265f9ec0a20413"),
|
||||
filepath.Join(p, "blobs", "sha256-c608dc615584cd20d9d830363dabf8a4783ae5d34245c3d8c115edb3bc7b28e4"),
|
||||
filepath.Join(p, "blobs", "sha256-f836ee110db21567f826332e4cedd746c06d10664fd5a9ea3659e3683a944510"),
|
||||
filepath.Join(p, "blobs", "sha256-ea34c57ba5b78b740aafe2aeb74dc6507fc3ad14170b64c26a04fb9e36c88d75"),
|
||||
})
|
||||
})
|
||||
|
||||
|
||||
@@ -132,6 +132,8 @@ func (s *Scheduler) processPending(ctx context.Context) {
|
||||
if len(pending.model.ProjectorPaths) > 0 && numParallel != 1 {
|
||||
numParallel = 1
|
||||
slog.Warn("multimodal models don't support parallel requests yet")
|
||||
} else if strings.Contains(pending.model.Config.ModelFamily, "bert") {
|
||||
numParallel = runtime.NumCPU()
|
||||
}
|
||||
|
||||
for {
|
||||
|
||||
8
template/alfred.json
Normal file
8
template/alfred.json
Normal file
@@ -0,0 +1,8 @@
|
||||
{
|
||||
"stop": [
|
||||
"<start_system>",
|
||||
"<end_message>",
|
||||
"<start_user>",
|
||||
"<start_assistant>"
|
||||
]
|
||||
}
|
||||
6
template/alpaca.json
Normal file
6
template/alpaca.json
Normal file
@@ -0,0 +1,6 @@
|
||||
{
|
||||
"stop": [
|
||||
"### Instruction:",
|
||||
"### Response"
|
||||
]
|
||||
}
|
||||
6
template/chatml.json
Normal file
6
template/chatml.json
Normal file
@@ -0,0 +1,6 @@
|
||||
{
|
||||
"stop": [
|
||||
"<|im_start|>",
|
||||
"<|im_end|>"
|
||||
]
|
||||
}
|
||||
8
template/chatqa.json
Normal file
8
template/chatqa.json
Normal file
@@ -0,0 +1,8 @@
|
||||
{
|
||||
"stop": [
|
||||
"System:",
|
||||
"User:",
|
||||
"Assistant:",
|
||||
"<|begin_of_text|>"
|
||||
]
|
||||
}
|
||||
7
template/codellama-70b-instruct.json
Normal file
7
template/codellama-70b-instruct.json
Normal file
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"stop": [
|
||||
"Source:",
|
||||
"Destination:",
|
||||
"<step>"
|
||||
]
|
||||
}
|
||||
6
template/falcon-instruct.json
Normal file
6
template/falcon-instruct.json
Normal file
@@ -0,0 +1,6 @@
|
||||
{
|
||||
"stop": [
|
||||
"User:",
|
||||
"Assistant:"
|
||||
]
|
||||
}
|
||||
6
template/gemma-instruct.json
Normal file
6
template/gemma-instruct.json
Normal file
@@ -0,0 +1,6 @@
|
||||
{
|
||||
"stop": [
|
||||
"<start_of_turn>",
|
||||
"<end_of_turn>"
|
||||
]
|
||||
}
|
||||
7
template/granite-instruct.json
Normal file
7
template/granite-instruct.json
Normal file
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"stop": [
|
||||
"System:",
|
||||
"Question:",
|
||||
"Answer:"
|
||||
]
|
||||
}
|
||||
8
template/llama2-chat.json
Normal file
8
template/llama2-chat.json
Normal file
@@ -0,0 +1,8 @@
|
||||
{
|
||||
"stop": [
|
||||
"[INST]",
|
||||
"[/INST]",
|
||||
"<<SYS>>",
|
||||
"<</SYS>>"
|
||||
]
|
||||
}
|
||||
7
template/llama3-instruct.json
Normal file
7
template/llama3-instruct.json
Normal file
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"stop": [
|
||||
"<|start_header_id|>",
|
||||
"<|end_header_id|>",
|
||||
"<|eot_id|>"
|
||||
]
|
||||
}
|
||||
6
template/magicoder.json
Normal file
6
template/magicoder.json
Normal file
@@ -0,0 +1,6 @@
|
||||
{
|
||||
"stop": [
|
||||
"@@ Instruction",
|
||||
"@@ Response"
|
||||
]
|
||||
}
|
||||
6
template/mistral-instruct.json
Normal file
6
template/mistral-instruct.json
Normal file
@@ -0,0 +1,6 @@
|
||||
{
|
||||
"stop": [
|
||||
"<|im_start|>",
|
||||
"<|im_end|>"
|
||||
]
|
||||
}
|
||||
5
template/openchat.json
Normal file
5
template/openchat.json
Normal file
@@ -0,0 +1,5 @@
|
||||
{
|
||||
"stop": [
|
||||
"<|end_of_turn|>"
|
||||
]
|
||||
}
|
||||
8
template/phi-3.json
Normal file
8
template/phi-3.json
Normal file
@@ -0,0 +1,8 @@
|
||||
{
|
||||
"stop": [
|
||||
"<|end|>",
|
||||
"<|system|>",
|
||||
"<|user|>",
|
||||
"<|assistant|>"
|
||||
]
|
||||
}
|
||||
7
template/solar-instruct.json
Normal file
7
template/solar-instruct.json
Normal file
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"stop": [
|
||||
"### System:",
|
||||
"### User:",
|
||||
"### Assistant"
|
||||
]
|
||||
}
|
||||
7
template/starcoder2-instruct.json
Normal file
7
template/starcoder2-instruct.json
Normal file
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"stop": [
|
||||
"### Instruction",
|
||||
"### Response",
|
||||
"<|endoftext|>"
|
||||
]
|
||||
}
|
||||
@@ -23,6 +23,7 @@ import (
|
||||
var indexBytes []byte
|
||||
|
||||
//go:embed *.gotmpl
|
||||
//go:embed *.json
|
||||
var templatesFS embed.FS
|
||||
|
||||
var templatesOnce = sync.OnceValues(func() ([]*named, error) {
|
||||
@@ -39,6 +40,15 @@ var templatesOnce = sync.OnceValues(func() ([]*named, error) {
|
||||
|
||||
// normalize line endings
|
||||
t.Bytes = bytes.ReplaceAll(bts, []byte("\r\n"), []byte("\n"))
|
||||
|
||||
params, err := templatesFS.ReadFile(t.Name + ".json")
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(params, &t.Parameters); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return templates, nil
|
||||
@@ -48,6 +58,10 @@ type named struct {
|
||||
Name string `json:"name"`
|
||||
Template string `json:"template"`
|
||||
Bytes []byte
|
||||
|
||||
Parameters *struct {
|
||||
Stop []string `json:"stop"`
|
||||
}
|
||||
}
|
||||
|
||||
func (t named) Reader() io.Reader {
|
||||
|
||||
6
template/vicuna.json
Normal file
6
template/vicuna.json
Normal file
@@ -0,0 +1,6 @@
|
||||
{
|
||||
"stop": [
|
||||
"USER:",
|
||||
"ASSISTANT:"
|
||||
]
|
||||
}
|
||||
8
template/zephyr.json
Normal file
8
template/zephyr.json
Normal file
@@ -0,0 +1,8 @@
|
||||
{
|
||||
"stop": [
|
||||
"<|system|>",
|
||||
"</s>",
|
||||
"<|user|>",
|
||||
"<|assistant|>"
|
||||
]
|
||||
}
|
||||
Reference in New Issue
Block a user