Compare commits

..

2 Commits

Author SHA1 Message Date
Roy Han
781585d9bd return 204 for cross-origin OPTIONS 2024-08-12 11:41:36 -07:00
Roy Han
b84a54be05 return 405 for bad method 2024-08-12 11:41:36 -07:00
37 changed files with 301 additions and 1255 deletions

View File

@@ -31,7 +31,7 @@ jobs:
security set-keychain-settings -lut 3600 build.keychain
- uses: actions/setup-go@v5
with:
go-version-file: go.mod
go-version: "stable"
cache: true
- name: Build Darwin
env:
@@ -87,7 +87,7 @@ jobs:
write-host "plugin installed"
- uses: actions/setup-go@v5
with:
go-version-file: go.mod
go-version: "stable"
cache: true
- run: go get ./...
- run: |
@@ -141,7 +141,7 @@ jobs:
write-host "plugin installed"
- uses: actions/setup-go@v5
with:
go-version-file: go.mod
go-version: "stable"
cache: true
- name: 'Install ROCm'
run: |
@@ -218,7 +218,7 @@ jobs:
write-host "plugin installed"
- uses: actions/setup-go@v5
with:
go-version-file: go.mod
go-version: "stable"
cache: true
- name: 'Install CUDA'
run: |
@@ -306,7 +306,7 @@ jobs:
write-host "plugin installed"
- uses: actions/setup-go@v5
with:
go-version-file: go.mod
go-version: "stable"
cache: true
- run: go get
- uses: actions/download-artifact@v4

View File

@@ -63,7 +63,7 @@ jobs:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
go-version-file: go.mod
go-version: "stable"
cache: true
- run: go get ./...
- run: |
@@ -163,7 +163,7 @@ jobs:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
go-version-file: go.mod
go-version: "stable"
cache: true
- name: 'Install ROCm'
run: |
@@ -200,7 +200,7 @@ jobs:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
go-version-file: go.mod
go-version: "stable"
cache: true
- name: 'Install CUDA'
run: |
@@ -255,7 +255,7 @@ jobs:
submodules: recursive
- uses: actions/setup-go@v5
with:
go-version-file: go.mod
go-version: "stable"
cache: false
- run: |
case ${{ matrix.arch }} in
@@ -297,7 +297,7 @@ jobs:
submodules: recursive
- uses: actions/setup-go@v5
with:
go-version-file: go.mod
go-version: "stable"
cache: true
- run: |
case ${{ matrix.arch }} in

View File

@@ -24,6 +24,7 @@ linters:
- nosprintfhostport
- staticcheck
- tenv
- testifylint
- unconvert
- unused
- usestdlibvars

View File

@@ -1,37 +0,0 @@
# Contributing to Ollama
Thank you for your interest in contributing to Ollama! Here are a few guidelines to help get you started.
## Set up
See the [development documentation](./docs/development.md) for instructions on how to build and run Ollama locally.
## Pull requests
### Ideal issues
* [Bugs](https://github.com/ollama/ollama/issues?q=is%3Aissue+is%3Aopen+label%3Abug): issues where Ollama stops working or where it results in an unexpected error.
* [Performance](https://github.com/ollama/ollama/issues?q=is%3Aissue+is%3Aopen+label%3Aperformance): issues to make Ollama faster at model inference, downloading or uploading.
* [Security](https://github.com/ollama/ollama/blob/main/SECURITY.md): issues that could lead to a security vulnerability. As mentioned in [SECURITY.md](https://github.com/ollama/ollama/blob/main/SECURITY.md), please do not disclose security vulnerabilities publicly.
### Issues that are harder to review
* New features: new features (e.g. API fields, environment variables) add surface area to Ollama and make it harder to maintain in the long run as they cannot be removed without potentially breaking users in the future.
* Refactoring: large code improvements are important, but can be harder or take longer to review and merge.
* Documentation: small updates to fill in or dorrect missing documentation is helpful, however large documentation additions can be hard to maintain over time.
### Issues that may not be accepted
* Changes that break backwards compatibility in Ollama's API (including the OpenAI-compatible API)
* Changes that add significant friction to the user experience
* Changes that create a large future maintenance burden for maintainers and contributors
### Best practices
* Commit messages: please leave both a title and a description in your commit messages. The title should be a short summary of the changes, with a leading word that explains the section of the code being changed (e.g. `api: fix parsing of prompt field`) . In the description, leave a short 2-3 sentences that explain more about the change and its impact.
* Tests: please add test coverage to changes where possible.
* Minimize dependencies: avoid adding new dependencies unless absolutely necessary.
## Need help?
If you need help with anything, feel free to reach out to us on our [Discord server](https://discord.gg/ollama).

View File

@@ -298,7 +298,7 @@ func (c *Client) List(ctx context.Context) (*ListResponse, error) {
return &lr, nil
}
// ListRunning lists running models.
// List running models.
func (c *Client) ListRunning(ctx context.Context) (*ProcessResponse, error) {
var lr ProcessResponse
if err := c.do(ctx, http.MethodGet, "/api/ps", nil, &lr); err != nil {
@@ -333,7 +333,7 @@ func (c *Client) Show(ctx context.Context, req *ShowRequest) (*ShowResponse, err
return &resp, nil
}
// Heartbeat checks if the server has started and is responsive; if yes, it
// Hearbeat checks if the server has started and is responsive; if yes, it
// returns nil, otherwise an error.
func (c *Client) Heartbeat(ctx context.Context) error {
if err := c.do(ctx, http.MethodHead, "/", nil, nil); err != nil {

View File

@@ -11,12 +11,12 @@ import (
)
const (
updateAvailableMenuID = 1
updateMenuID = updateAvailableMenuID + 1
separatorMenuID = updateMenuID + 1
diagLogsMenuID = separatorMenuID + 1
diagSeparatorMenuID = diagLogsMenuID + 1
quitMenuID = diagSeparatorMenuID + 1
updatAvailableMenuID = 1
updateMenuID = updatAvailableMenuID + 1
separatorMenuID = updateMenuID + 1
diagLogsMenuID = separatorMenuID + 1
diagSeparatorMenuID = diagLogsMenuID + 1
quitMenuID = diagSeparatorMenuID + 1
)
func (t *winTray) initMenus() error {
@@ -35,7 +35,7 @@ func (t *winTray) initMenus() error {
func (t *winTray) UpdateAvailable(ver string) error {
if !t.updateNotified {
slog.Debug("updating menu and sending notification for new update")
if err := t.addOrUpdateMenuItem(updateAvailableMenuID, 0, updateAvailableMenuTitle, true); err != nil {
if err := t.addOrUpdateMenuItem(updatAvailableMenuID, 0, updateAvailableMenuTitle, true); err != nil {
return fmt.Errorf("unable to create menu entries %w", err)
}
if err := t.addOrUpdateMenuItem(updateMenuID, 0, updateMenutTitle, false); err != nil {

View File

@@ -11,7 +11,6 @@ import (
"path/filepath"
"sort"
"sync"
"syscall"
"unsafe"
"golang.org/x/sys/windows"
@@ -434,12 +433,7 @@ func (t *winTray) setIcon(src string) error {
t.muNID.Lock()
defer t.muNID.Unlock()
t.nid.Icon = h
t.nid.Flags |= NIF_ICON | NIF_TIP
if toolTipUTF16, err := syscall.UTF16FromString(commontray.ToolTip); err == nil {
copy(t.nid.Tip[:], toolTipUTF16)
} else {
return err
}
t.nid.Flags |= NIF_ICON
t.nid.Size = uint32(unsafe.Sizeof(*t.nid))
return t.nid.modify()

View File

@@ -61,7 +61,6 @@ const (
MIIM_SUBMENU = 0x00000004
MIM_APPLYTOSUBMENUS = 0x80000000
NIF_ICON = 0x00000002
NIF_TIP = 0x00000004
NIF_INFO = 0x00000010
NIF_MESSAGE = 0x00000001
SW_HIDE = 0

View File

@@ -22,7 +22,6 @@ import (
"runtime"
"slices"
"strings"
"sync/atomic"
"syscall"
"time"
@@ -79,7 +78,6 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
status := "transferring model data"
spinner := progress.NewSpinner(status)
p.Add(status, spinner)
defer p.Stop()
for i := range modelfile.Commands {
switch modelfile.Commands[i].Name {
@@ -114,7 +112,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
path = tempfile
}
digest, err := createBlob(cmd, client, path, spinner)
digest, err := createBlob(cmd, client, path)
if err != nil {
return err
}
@@ -265,20 +263,13 @@ func tempZipFiles(path string) (string, error) {
return tempfile.Name(), nil
}
func createBlob(cmd *cobra.Command, client *api.Client, path string, spinner *progress.Spinner) (string, error) {
func createBlob(cmd *cobra.Command, client *api.Client, path string) (string, error) {
bin, err := os.Open(path)
if err != nil {
return "", err
}
defer bin.Close()
// Get file info to retrieve the size
fileInfo, err := bin.Stat()
if err != nil {
return "", err
}
fileSize := fileInfo.Size()
hash := sha256.New()
if _, err := io.Copy(hash, bin); err != nil {
return "", err
@@ -288,43 +279,13 @@ func createBlob(cmd *cobra.Command, client *api.Client, path string, spinner *pr
return "", err
}
var pw progressWriter
status := "transferring model data 0%"
spinner.SetMessage(status)
done := make(chan struct{})
defer close(done)
go func() {
ticker := time.NewTicker(60 * time.Millisecond)
defer ticker.Stop()
for {
select {
case <-ticker.C:
spinner.SetMessage(fmt.Sprintf("transferring model data %d%%", int(100*pw.n.Load()/fileSize)))
case <-done:
spinner.SetMessage("transferring model data 100%")
return
}
}
}()
digest := fmt.Sprintf("sha256:%x", hash.Sum(nil))
if err = client.CreateBlob(cmd.Context(), digest, io.TeeReader(bin, &pw)); err != nil {
if err = client.CreateBlob(cmd.Context(), digest, bin); err != nil {
return "", err
}
return digest, nil
}
type progressWriter struct {
n atomic.Int64
}
func (w *progressWriter) Write(p []byte) (n int, err error) {
w.n.Add(int64(len(p)))
return len(p), nil
}
func RunHandler(cmd *cobra.Command, args []string) error {
interactive := true
@@ -1125,7 +1086,7 @@ func generate(cmd *cobra.Command, opts runOptions) error {
return nil
}
func RunServer(_ *cobra.Command, _ []string) error {
func RunServer(cmd *cobra.Command, _ []string) error {
if err := initializeKeypair(); err != nil {
return err
}

View File

@@ -27,10 +27,6 @@ func (Parameters) KV(t *Tokenizer) llm.KV {
"tokenizer.ggml.token_type": t.Vocabulary.Types,
}
if len(t.Merges) > 0 {
kv["tokenizer.ggml.merges"] = t.Merges
}
if t.Template != "" {
kv["tokenizer.chat_template"] = t.Template
}
@@ -93,8 +89,6 @@ func Convert(fsys fs.FS, ws io.WriteSeeker) error {
conv = &mixtral{}
case "GemmaForCausalLM":
conv = &gemma{}
case "Phi3ForCausalLM":
conv = &phi3{}
default:
return errors.New("unsupported architecture")
}

View File

@@ -90,6 +90,10 @@ func (p *llama) KV(t *Tokenizer) llm.KV {
kv["llama.attention.value_length"] = p.HeadDim
}
if len(t.Merges) > 0 {
kv["tokenizer.ggml.merges"] = t.Merges
}
return kv
}

View File

@@ -1,125 +0,0 @@
package convert
import (
"cmp"
"encoding/binary"
"io"
"math"
"strings"
"sync"
"github.com/ollama/ollama/llm"
)
type phi3 struct {
Parameters
NumHiddenLayers uint32 `json:"num_hidden_layers"`
NLayers uint32 `json:"n_layers"`
HiddenSize uint32 `json:"hidden_size"`
NEmbd uint32 `json:"n_embd"`
IntermediateSize uint32 `json:"intermediate_size"`
NumAttentionHeads uint32 `json:"num_attention_heads"`
NHead uint32 `json:"n_head"`
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
NHeadKV uint32 `json:"n_head_kv"`
RopeTheta float32 `json:"rope_theta"`
RopeScaling struct {
Type string `json:"type"`
LongFactor ropeFactor `json:"long_factor"`
ShortFactor ropeFactor `json:"short_factor"`
} `json:"rope_scaling"`
RMSNormEPS float32 `json:"rms_norm_eps"`
NPositions uint32 `json:"n_positions"`
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
OriginalMaxPositionEmbeddings uint32 `json:"original_max_position_embeddings"`
SlidingWindow uint32 `json:"sliding_window"`
}
var _ Converter = (*phi3)(nil)
func (p *phi3) KV(t *Tokenizer) llm.KV {
kv := p.Parameters.KV(t)
kv["general.architecture"] = "phi3"
kv["general.name"] = "phi3"
kv["phi3.context_length"] = p.MaxPositionEmbeddings
kv["phi3.embedding_length"] = cmp.Or(p.HiddenSize, p.NEmbd)
kv["phi3.feed_forward_length"] = p.IntermediateSize
kv["phi3.block_count"] = cmp.Or(p.NumHiddenLayers, p.NLayers)
kv["phi3.attention.head_count"] = cmp.Or(p.NumAttentionHeads, p.NHead)
kv["phi3.attention.head_count_kv"] = cmp.Or(p.NumKeyValueHeads, p.NHeadKV)
kv["phi3.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
kv["phi3.rope.dimension_count"] = p.HiddenSize / cmp.Or(p.NumAttentionHeads, p.NHead)
kv["phi3.rope.freq_base"] = p.RopeTheta
kv["phi3.rope.scaling.original_context_length"] = p.OriginalMaxPositionEmbeddings
kv["phi3.attention.sliding_window"] = p.SlidingWindow
scale := float64(p.MaxPositionEmbeddings) / float64(p.OriginalMaxPositionEmbeddings)
switch p.RopeScaling.Type {
case "":
// no scaling
case "su", "longrope":
kv["phi3.rope.scaling.attn_factor"] = float32(max(math.Sqrt(1+math.Log(scale)/math.Log(float64(p.OriginalMaxPositionEmbeddings))), 1.0))
case "yarn":
kv["phi3.rope.scaling.attn_factor"] = float32(max(0.1*math.Log(scale)+1.0, 1.0))
default:
panic("unknown rope scaling type")
}
return kv
}
func (p *phi3) Tensors(ts []Tensor) []llm.Tensor {
var addRopeFactors sync.Once
out := make([]llm.Tensor, 0, len(ts)+2)
for _, t := range ts {
name := p.tensorName(t.Name())
if strings.HasPrefix(name, "blk.0.") {
addRopeFactors.Do(func() {
out = append(out, llm.Tensor{
Name: "rope_factors_long.weight",
Kind: 0,
Shape: []uint64{uint64(len(p.RopeScaling.LongFactor))},
WriterTo: p.RopeScaling.LongFactor,
}, llm.Tensor{
Name: "rope_factors_short.weight",
Kind: 0,
Shape: []uint64{uint64(len(p.RopeScaling.ShortFactor))},
WriterTo: p.RopeScaling.ShortFactor,
})
})
}
out = append(out, llm.Tensor{
Name: name,
Kind: t.Kind(),
Shape: t.Shape(),
WriterTo: t,
})
}
return out
}
func (p *phi3) tensorName(n string) string {
return strings.NewReplacer(
"lm_head", "output",
"model.embed_tokens", "token_embd",
"model.norm", "output_norm",
"model.layers", "blk",
"input_layernorm", "attn_norm",
"self_attn.qkv_proj", "attn_qkv",
"self_attn.o_proj", "attn_output",
"mlp.down_proj", "ffn_down",
"mlp.gate_up_proj", "ffn_up",
"post_attention_layernorm", "ffn_norm",
).Replace(n)
}
type ropeFactor []float32
func (r ropeFactor) WriteTo(w io.Writer) (int64, error) {
err := binary.Write(w, binary.LittleEndian, r)
return 0, err
}

View File

@@ -65,8 +65,6 @@ func TestConvertFull(t *testing.T) {
"Mistral-7B-Instruct-v0.2",
"Mixtral-8x7B-Instruct-v0.1",
"gemma-2b-it",
// microsoft/Phi-3-mini-128-instruct@d548c233192db00165d842bf8edff054bb3212f8
"Phi-3-mini-128k-instruct",
}
for i := range cases {

View File

@@ -1,225 +0,0 @@
{
"general.architecture": "phi3",
"general.file_type": "1",
"general.quantization_version": "2",
"phi3.block_count": "32",
"phi3.context_length": "131072",
"phi3.embedding_length": "3072",
"phi3.feed_forward_length": "8192",
"phi3.rope.scaling.original_context_length": "4096",
"phi3.rope.dimension_count": "96",
"phi3.rope.freq_base": "10000",
"phi3.rope.scaling.attn_factor": "1.1902381",
"phi3.attention.head_count": "32",
"phi3.attention.head_count_kv": "32",
"phi3.attention.layer_norm_rms_epsilon": "1e-05",
"phi3.attention.sliding_window": "262144",
"tokenizer.ggml.model": "llama",
"tokenizer.ggml.pre": "default",
"tokenizer.ggml.add_bos_token": "false",
"tokenizer.ggml.add_eos_token": "false",
"tokenizer.ggml.bos_token_id": "1",
"tokenizer.ggml.eos_token_id": "32000",
"tokenizer.ggml.unknown_token_id": "0",
"tokenizer.ggml.padding_token_id": "32000",
"tokenizer.ggml.scores": "6e37bcde2adc7e350e87c496eddd7a2124329c1dc66c5bf3ad3997253e4f7a62",
"tokenizer.ggml.token_type": "b6ecf55ec64ee67d87750bdb8d757a2c58bf78377e9f4219f5689a6c4dea57ce",
"tokenizer.ggml.tokens": "d168da3ddd3eee820916945fcb9baf24dd3cde42f606cffa2d19e7c8a8743918",
"blk.0.attn_norm.weight": "216aeb2c9e0c271f899e1ef2a63cceeb8f41e97642e84fada54b1d3c1c11cf25",
"blk.0.attn_output.weight": "b597d56f7188ffc1fafc273fadc59d41738cffd677ae98c61a62c3285b3a3099",
"blk.0.attn_qkv.weight": "d28a6b44e13f59be5483e4be2bedb544e346168d720aca27f47d1a5a722be91e",
"blk.0.ffn_down.weight": "4a691370e5a61fcbbf540fbcbf4c0f1d15dec0364528c0e916d0744f6262b63b",
"blk.0.ffn_norm.weight": "0c00af2b4a3128bec64a0cbb1084b042fdbe13d9ad0d03bd577f9449dfead338",
"blk.0.ffn_up.weight": "b32b52f790c1c083bfb8a3126dc1111cfeeb28dc8c584a930a1e5334cb176bf4",
"blk.1.attn_norm.weight": "68748011503c6c029e8e69a84a8e5a89338f378769627b6dbf7f93d715c292e1",
"blk.1.attn_output.weight": "2267344add13b048ca59e4377c86dc512be8046a57156901fa32a20fa74e4ee0",
"blk.1.attn_qkv.weight": "9109d2e3d7a2eacfda5226587b8be124a3bf44b972da7ebb17aa15795897eacc",
"blk.1.ffn_down.weight": "d675df4df4dd039c0c339ad6445d39eddd2004db6bf35bed6314c7497245a633",
"blk.1.ffn_norm.weight": "3b5767ae977bc8baaa06b06efdbea193b6b3ba605ce76d77a76ce317e935500c",
"blk.1.ffn_up.weight": "80dfd6d9d234b00334c89b8e0a02f81899c2efd377321c34ba5ba51a5f61b5ff",
"blk.2.attn_norm.weight": "6a6743b057e5088f145bc179e92c9bfb41163e7295d7b81c62e23dd89d2b59c4",
"blk.2.attn_output.weight": "bc5491ea54e0db81462d7d9b7d25cbdda380c2db8de041bd1c4ab7b76a1d19c3",
"blk.2.attn_qkv.weight": "a61287a9852e2f5aca9c100b471d98398b2913a3497c743de3c70ec9ddd7087f",
"blk.2.ffn_down.weight": "4fddcc382c8dceeab027fe43d8d44e67edb5e8ce4b9a1b7f773c87770380ade1",
"blk.2.ffn_norm.weight": "07e05f82b3f63f711db3b684ca79aed25c0657917e66f88af47348a82065c227",
"blk.2.ffn_up.weight": "4835a682ef1826c12df01ae7663fc45f9c82bc8e64b665f13fb7da8e201ec0fb",
"blk.3.attn_norm.weight": "f22aba7c03999ba7136f39cda747a39715e498699dc1716cd97fc5dfc58d1b1c",
"blk.3.attn_output.weight": "53b579855366fd786c5126b2b30aac4d583ca7bda56833c4865f5cadb5c18c6d",
"blk.3.attn_qkv.weight": "bb56aba78158123140fcea59c69ac562ca208f6d3086819417cdad8c50f333ad",
"blk.3.ffn_down.weight": "97280897a7cd86db2830c004bccc5bc094f50e293baded0189159a2019145a6e",
"blk.3.ffn_norm.weight": "10a8c99f8b57a960e8e0a1133c4a26f9148403d1b9bff2eff114917de996f3b5",
"blk.3.ffn_up.weight": "7324046c915e75d621b2043597a245a428d8eea31869135e6257a861491d8dcc",
"blk.4.attn_norm.weight": "507d8e164de94646edbfe33def8e8fbf7c9a6ee3fbaedb5000f72d9f51ec5e36",
"blk.4.attn_output.weight": "bbb3429e6efa98c150e0fdbf48c16180cbf0d0cbc1b3c253c6c319d78f4593a2",
"blk.4.attn_qkv.weight": "b95ee5be0786d3901273d806c339fe6c20e6bfffd2a20672a9f56af80921e8ab",
"blk.4.ffn_down.weight": "806bbf91df92a5a22bd5aa1ffb7fc2869f7293ffc7704771c290ecc583b27975",
"blk.4.ffn_norm.weight": "cfc2930a81df7aee3a5e7f726a15c1182233e868bf0d9d37f6b6ae6d8c15c234",
"blk.4.ffn_up.weight": "c3390c69533de2c8424e8069323ccc5d0c4543111535da04cf2c7d26745576aa",
"blk.5.attn_norm.weight": "0d71c4fbcefabbd021569442853d2fe90668b19409ae2805a718a829ca60beab",
"blk.5.attn_output.weight": "10ebd93629112bf2df5c30dd0953a4a5e9020306768283181ed426934d47e14f",
"blk.5.attn_qkv.weight": "5cb05633369f12d4b00e0ff787736bd846856682115720ebc6cce05270c334f6",
"blk.5.ffn_down.weight": "e28bcc5094212eafc7476dbc5b7a520d25b79578cbf4229d698e2655956a80ad",
"blk.5.ffn_norm.weight": "b6f2c4cf9f34bb4d59989f96165c14a67dc1e266ad0a6d0fcc49f1add929e6ff",
"blk.5.ffn_up.weight": "0f9ef99423cc07ebedc0e9cfa95809f2d7108d910bb4ef97ebc0b0309c440750",
"blk.6.attn_norm.weight": "b3edcc47a42218234f7564d7470611b49401a41ae8cd42123f86557c69f5d7f2",
"blk.6.attn_output.weight": "eb9b7d257b388bb5b8fe0515e5c6873317239cb94cda236e4b6ada2a6c57c65c",
"blk.6.attn_qkv.weight": "eb968081f478c52f07bd9c2761741e982dba33cc4eeadeea3557d391b9ac2106",
"blk.6.ffn_down.weight": "1b8588bb7463206290322695577dcfced300895d6e6f4b26966c53a9ae2f0f84",
"blk.6.ffn_norm.weight": "1219c04b7770983c77814200eefe743f46d15328ea2b12711e44f8103eab08d3",
"blk.6.ffn_up.weight": "197ef287239fec47c55677f0fbb66eaf0644f775bc382de843971730721394f6",
"blk.7.attn_norm.weight": "b630ad08c80d564ed1c024384818e9fd3f22a36cd7a14aa96e7e2759a8285099",
"blk.7.attn_output.weight": "970255aa750828a47d6b9d399f9612b5bf25aefe7dadbcba41fc416d0d4067c1",
"blk.7.attn_qkv.weight": "ebb157c880293e6de8d629f263ba8853ed1dbdc02c311d43432bb8cfbb310739",
"blk.7.ffn_down.weight": "24bcd4db4cba844c89f878b81843c373dbbc0675e889d32c5b12e63384a7b670",
"blk.7.ffn_norm.weight": "b9c6f71001808ee873ce7db8056e4b53fb4cccec8b7f0f312899b575fae39d39",
"blk.7.ffn_up.weight": "979f1828d227455c26015a2a11afe9dd05f2bb97a8ba6b38c8dab3f50e627401",
"blk.8.attn_norm.weight": "4e8e347e3775010b7112ee630f2f4f2383be7ff64e6ca6154b9b22566552eaa6",
"blk.8.attn_output.weight": "65a44babf44a435a1829945211b3168f9ec78ac3cb7a049a733e93d11f0d6659",
"blk.8.attn_qkv.weight": "343ed07671da400b040812a4058482fa38284b5d9af9becfed07417fe26ce747",
"blk.8.ffn_down.weight": "7fb7e073e3c2c503c4e9d60efa0988fed7398d900cc003695fe3fffd3e188b82",
"blk.8.ffn_norm.weight": "b07c1f655d8593e3892a2cf73f8a0c19ce8e5cb613fafbe7cbd430da8ce4c57d",
"blk.8.ffn_up.weight": "8b26e14de54b3fdc2e2d3ea41720f9d9c236a93688c3b7fd7bf43f5fbb327c9b",
"blk.9.attn_norm.weight": "46394d408a8e316916177e6aa261de32e137a82d729c0b1800b072f0c38c39b6",
"blk.9.attn_output.weight": "d57f3d46107947a7073373a0b35d6ecf7759b5df15406f4a3590a60666af6b16",
"blk.9.attn_qkv.weight": "14bb8ace8c5453148f4b536e9f4279c813f31136716947256f5cca333448639c",
"blk.9.ffn_down.weight": "2b8d98e2b5ed68338f6e4de43bf7de0c4858cc69103cd5177725f7444eec7694",
"blk.9.ffn_norm.weight": "41a499dfd418cc4c6b8c12313f673f7e2cd4a3f9c4065eb6c4feb5eed02fb542",
"blk.9.ffn_up.weight": "143aab7533a64b17fbe201490a6f674bc7f0bd370c094500b2e100419073d1c2",
"blk.10.attn_norm.weight": "ebb670aafd36816a794347287269d8f1a5b19c1e3c0a1e38023bc19fdba9b073",
"blk.10.attn_output.weight": "b5d65bbc0ed5e49fdd9d754bc18163cd042a285024d0cf6f954c503bc8c877cb",
"blk.10.attn_qkv.weight": "f06b15bac88da798fa34a62b03eaac0dbe8b846020516603c387541f2d8dd672",
"blk.10.ffn_down.weight": "fb091fcd1b4de25d1bea94d1755e255cb02914a030d23e3a234e57b8d46bde6e",
"blk.10.ffn_norm.weight": "eb347bdf9c40414af87e13a8e72e40b31f004b50f7cb366f1a219ced60a61355",
"blk.10.ffn_up.weight": "ed2d52fc881a173f404fe8a1067862c9856d6c3e0d2e90a330a7aa394e3f84d1",
"blk.11.attn_norm.weight": "64e252603cf010a0e502ca39fdf8d0a196a79aec67c0d2bb9213fc0cb80c47d4",
"blk.11.attn_output.weight": "228e33e21c69f52efc74fdfc831bc9af271e44b2a29a3dced1d64e667ce36eb5",
"blk.11.attn_qkv.weight": "ab9ce6d4ef9e42ee0da3f20a7708a3bbc5e79e967b05fa86ba946a05e2eb63eb",
"blk.11.ffn_down.weight": "0ca133b7835c98dc77c25d64e4eb7873778bdb5e4d22d8b80f920f46865b43bd",
"blk.11.ffn_norm.weight": "02455741a0dfd161c79aa1ecc381901721f229fdcda5615622a629631fb61cfd",
"blk.11.ffn_up.weight": "9fecdcc099fbb8e23c6b1ea9294702a027f4a58d265543ec5e7be79b8f63b354",
"blk.12.attn_norm.weight": "783bb459911b1b3609a9b2bdfe272f1670add73b5471da738e07ac47e2e07dfd",
"blk.12.attn_output.weight": "1e1a914c9e48b857206ac5a1f7cead994bc1ea91d5d4fff8c834d73f2e38ef5d",
"blk.12.attn_qkv.weight": "5953e7185ccb87fb4dae8f9426ec86315d4c7794326e8ab59b3a95d4af2189f0",
"blk.12.ffn_down.weight": "a3eecf0f394f86e2cfb48a5940a5c50ca86d71883b2f79fcc642a935fabce0d4",
"blk.12.ffn_norm.weight": "0a4272e41373c23bd72f10d2d82930aa3a1480aac75832bfbf01cebf0b86b6a4",
"blk.12.ffn_up.weight": "06f42776de3a7ceac3025f26a7a8bd20e062233cce2bdaa2183470dc4b30b87d",
"blk.13.attn_norm.weight": "5915da60fb03e201fa649faba780e5fdf1c761c262b206e5415cf83181f65780",
"blk.13.attn_output.weight": "4dbf6eab074fa3835fd32bd631a8208e511037d5056d2fd3015735cca7674ef7",
"blk.13.attn_qkv.weight": "d3d8339a1c4782d9e73d77fdebe154d3c5b83ac40c9175b3e91a4977d08f876b",
"blk.13.ffn_down.weight": "de6772b46a55e1fd42b007637dfbf68b6598e5d5b61622da0935002e1e192d3a",
"blk.13.ffn_norm.weight": "5a640ea3b8c7be49c95a58a2327e10d8e8d9d142504bde5c8091613e5b961d7a",
"blk.13.ffn_up.weight": "f35e3545e4bd3531b2e843b5efd31dee0c13c807ee6386e65473ba67bbec30d0",
"blk.14.attn_norm.weight": "9b34986450b7c98b4927e81e61a816f9e84b1addc7c14926402100037aad6678",
"blk.14.attn_output.weight": "155d52efb23d366016d861a251d4d1f4a0c13699188c50d50dba016a0d8bfcd9",
"blk.14.attn_qkv.weight": "8e1415084e1f33c73a777f19e752489f4dd312cca047733e5ea643cd4a955e04",
"blk.14.ffn_down.weight": "a2a142226b94baa01ccb65bdea2b7418e49085c1d9c3c63e544e3112c58a25da",
"blk.14.ffn_norm.weight": "8aecfd9b0ae6affaea31a80c5c9a4a14b31deaa0db7bd8f6da2a64d23447921c",
"blk.14.ffn_up.weight": "0c1407237b8c1bd02f193346b5681926fe698a5055eac6a7450451b0f991707c",
"blk.15.attn_norm.weight": "e037bd19880bfa83d983200fb0c7866f8ad16c3ff5cc4b4f3a37ca7373870ff6",
"blk.15.attn_output.weight": "045fe4fc95cc129a1b92771b179c11b12845c4c088786c607f17bd98857e68e1",
"blk.15.attn_qkv.weight": "7621b7559705cab1d4dea1c69f76dbf9dc1c8837a203b656f484703b9c1b70ce",
"blk.15.ffn_down.weight": "7e5ac20e290bc60761e1cd972354fde225b7fa861048d44d9a0dd9b046d55f58",
"blk.15.ffn_norm.weight": "b6d830d88f1db1825687973c8c2b1a24c6fa84f07af8d0e3ef9c86009baca0b2",
"blk.15.ffn_up.weight": "dcda0957cd04fc45476774dba2bbf9aa89d6b05d5ca7b10ae6f73ad2c49b1cd3",
"blk.16.attn_norm.weight": "4ee9b70ba15cb2a08240f93990e90f5068c48fceb481f8e2186bec8b7214eb3f",
"blk.16.attn_output.weight": "315cfe5536658d2498192b2980eade15b2c9a4ff220e4011911457b1727fa103",
"blk.16.attn_qkv.weight": "3c8122e3ad637583b9dcde8ff3a323267d3014bb1f0f9771e5322260ca9ecc8d",
"blk.16.ffn_down.weight": "3b5fbebd5ee2b86cad96fb8a9b45a8770d08f82c1c8b74d7061e866f7020a18d",
"blk.16.ffn_norm.weight": "ffab69f20bda372de6e5878f0539163e2fc6ba113621ded95705fc3b1465c9f0",
"blk.16.ffn_up.weight": "0935ea3d258da42d6258406365f39f58ddaabfe97ea5977580db3635188f24a1",
"blk.17.attn_norm.weight": "f030441733f3d147b4a06a1eb4aeb8465c7c24d9c53bf4c48fe7e134d3629803",
"blk.17.attn_output.weight": "07a955ef09e8dc766ac0df647d0b2c69f23c4c69a7137654b4aad80303ed0eda",
"blk.17.attn_qkv.weight": "1c10688061e21e2fe12ad0cb54bf03895c1f83c3b0df743a42f548b52cbca1b2",
"blk.17.ffn_down.weight": "ebb9cc9836f41d88fdae2aa9a4355514e4edaec8d1577ffeb947a35204e77f52",
"blk.17.ffn_norm.weight": "50aff44f6528b13db5389f2ddcdb7676244947610bd7ffbff3f881c968c2a0d4",
"blk.17.ffn_up.weight": "d716537949582be33bde6b02e38f5a70081c9642a9fb05a61312126718b8d148",
"blk.18.attn_norm.weight": "0ea695c4e53d637902f46663a6ee42adc493c36794476acc7dbddaa05b13840d",
"blk.18.attn_output.weight": "5fd35b500221a612eb4f4bddf0e9b6b7db4d7733032a75f8802fb2d884647c2e",
"blk.18.attn_qkv.weight": "b0da37fd030fe69581f990bf23bfd35467a1bbe558af6de7c0924f6b72e92317",
"blk.18.ffn_down.weight": "b355c33f44b328f4bb977567de8f7544db4b005d7a8fbded658518ecf3c5a153",
"blk.18.ffn_norm.weight": "58b3fe9094079989a86e0387143259e1cc35952d24dc3df290c4ba6df44f5c51",
"blk.18.ffn_up.weight": "2ce530954c342c30ed2ead5353f931960bfae1d278868504c0efb973560fabbe",
"blk.19.attn_norm.weight": "533e9aed66feea8f0392aa81f9e293240e1f009a5334253915fb60c2749b615d",
"blk.19.attn_output.weight": "84f2d00f98a4113a779d3b5d1c3e7c914eb47784d3ab13b290367c124c2994aa",
"blk.19.attn_qkv.weight": "fbe6b9f53b07fa7537d3b3d452d20a9bc666f9fd41ec2091dd28bc2f70fc668f",
"blk.19.ffn_down.weight": "b30199e098c8bb3f890183d8b18471e80b62b604729b277ad62488dd71e1206b",
"blk.19.ffn_norm.weight": "c81373e41cd340b7badb19f9517c77c4250b4eb9a02dc758b8b49b652487d7ff",
"blk.19.ffn_up.weight": "5a5cb083ca7725720e3a890f7fa46354760e8007a8188849a092e305694a75e3",
"blk.20.attn_norm.weight": "4953091b4477e354357a8e743ba0a1900633e52f1599ee082a0c9b0b2b5cd978",
"blk.20.attn_output.weight": "62d54f7749cd6856097b2632066a322b0296df915fe66f382c5b5981be0d4f23",
"blk.20.attn_qkv.weight": "406de9e35b0729ebe902d7a47905cc7fb29a921431ed35dbef0c03e5690a1329",
"blk.20.ffn_down.weight": "62fb678b0d1261e19a4903a2b347d67afcc8acff01feb33a687a35a2d1e6f9a5",
"blk.20.ffn_norm.weight": "cd9d36b7e71e55c8925b97bb09c28219f182626bcff094878ae39c3db887a14b",
"blk.20.ffn_up.weight": "b9276771d79d3e932e73ccc520c3f8476342b9ef312ed2ee1e0da822e6e3ad18",
"blk.21.attn_norm.weight": "66d8c8a35e13ce9c2a0e75b670150e2c31484a55c2316df46075312196178ed3",
"blk.21.attn_output.weight": "12ab46c9382648f9b3350fdd92a6be6352743d62d6b520d7e2024e0c838588f5",
"blk.21.attn_qkv.weight": "a7909676ee1675ca23cd29a5fdd226df8dd9d68f94c6c9bbb51dd9fd38504008",
"blk.21.ffn_down.weight": "6fb317279c6542e82f97d5a12a60fac1bd0fa0405154f9fbe265e2fe39bd49cc",
"blk.21.ffn_norm.weight": "c0f703eb3ff161b5ba4490d87d8684b8a6c47a8f433e12f418333b9db439010a",
"blk.21.ffn_up.weight": "6dbdb80ef0c35e364bbce12d40d5e74c7963c7b55d58d9579567a07ffce7b863",
"blk.22.attn_norm.weight": "f94237433bf03d675cb2f655b81ca91a1ce2447bc6b00b13d6b0ccfe2d411eff",
"blk.22.attn_output.weight": "e821f95995ce497c01e63ca64f737713b1b65f11df1903e51d444aa516f33f71",
"blk.22.attn_qkv.weight": "1b0f717c73afb5eb4c82a1708c4e85c969e8a2a8770d9ddb78b1870a2d8a781e",
"blk.22.ffn_down.weight": "0f33f7a3cdc685484be99aa0c03642b0b20850a27d1fddbe054b13a9382f3ccb",
"blk.22.ffn_norm.weight": "9df285cf211ddd7df2b36a50489af574755c7d4d98b29a05cd04566ae613c8dc",
"blk.22.ffn_up.weight": "63ac300e1efb34041dd0136cf43ea622fac6f0caccce1cd9262f5e08d2cf179c",
"blk.23.attn_norm.weight": "5f72d9e88689b4027b28f5f8f26cd3abb03635ceea7ec98a4c91a9fc691f6707",
"blk.23.attn_output.weight": "6ecf04ff61125c5fc768f8656497152149373daf321ee9c957e8f7245a1184d1",
"blk.23.attn_qkv.weight": "a9d9978806724c2959f2cf386c233831f08e1e933dbf2b32665e788d9d512ea4",
"blk.23.ffn_down.weight": "72c7d17886a3da17fa0daa456aa5e877b2ef5b8b403182b870d9ca5ca9c70347",
"blk.23.ffn_norm.weight": "971e4b712e3025a13419b5b57d674b5e4ab7f18f74b57b9afc4671623da90c4b",
"blk.23.ffn_up.weight": "df2b5c7dbd5834545b815073af0c7355b065124e6d6f0fee78d8fa5b2076dc3e",
"blk.24.attn_norm.weight": "c41957c4a79ad3b16f6e11daec1c7f530b9f3f4b618e1e4367c3b67787ac4ab6",
"blk.24.attn_output.weight": "ef7d61f5fc88ac6f31bf60cb5f4d2d6b8df42d38825807112361a7224b0dee3b",
"blk.24.attn_qkv.weight": "3e6a58fe7d49c90bb6971efbad3371c32256881173ea5aee4b0c296cb206490f",
"blk.24.ffn_down.weight": "f43619144047de42fed81dfa495f1815d3cb771330e574043e2b67620819292c",
"blk.24.ffn_norm.weight": "5501d4a2a98c8ca6b42e77b53b221dbc08f530f6a067256d787534ec6fe028bd",
"blk.24.ffn_up.weight": "d64c8b0e509e2b1118f6000176f8956cacecdbb200c7e95ed93fb78b6e26c84a",
"blk.25.attn_norm.weight": "502fa3c302d371f61c5791f4615b73018ffb1daa09b6499b227116581244c5d4",
"blk.25.attn_output.weight": "ad8391d4e9c980856f2547aa945b2b6a407a6382158dc1ddd4f08d94ecc24be6",
"blk.25.attn_qkv.weight": "42e8983780d4a01a02c54ad23d4df21eea437f119a10af5a9c12a76a42d308c1",
"blk.25.ffn_down.weight": "302dd010d4e0ab4eeaee89090409ea0dddeeeed3236415eb8f97c942497eea91",
"blk.25.ffn_norm.weight": "fb34c1ee5bca96986c08834df0a0c047ba041c1123ac1f563e9d64312bf82d6a",
"blk.25.ffn_up.weight": "10739a8de156816d93c92b935386540bfa976bdbef204f0312960f6fc657582f",
"blk.26.attn_norm.weight": "7036c711609128c4e55968ff3681d3043338879a5737efd6c2ac9e1a2a61f1a0",
"blk.26.attn_output.weight": "db5db45dead5cb911fa01da59832f121b7c18b2d167bf53741c40819f24d346c",
"blk.26.attn_qkv.weight": "cae34c6b7f82ed14348d5ed30a79919c383737c1694a9cb9c0de609d3b0c1d0a",
"blk.26.ffn_down.weight": "491ec3a4da9b4f49f8ebc6be658ce397a9b801ae9fb35e82177e47808c65e5d0",
"blk.26.ffn_norm.weight": "fd7059d75d7f0e5288511ddeeb0f772eb3cae3ccfe4226b877015834edc3c386",
"blk.26.ffn_up.weight": "ea1ee1274c56458ce056d2205e5bb6e5422ce4cb0ad58006b8141749b97a0c39",
"blk.27.attn_norm.weight": "cc362c9a937609265052cd38544af17a1a7448cea086d4c801139e1fc865832d",
"blk.27.attn_output.weight": "ba757a81dabde9cb1b069d1bb616fe79649a1724f756567ec61caed1304fe6cf",
"blk.27.attn_qkv.weight": "1ab8d7d02d87756c12c2275636823aa5ede3d683178225c4cac4bd892c319bd4",
"blk.27.ffn_down.weight": "deb1c711c8a66acf4dcd2d088e1548f8e08f296f755e4067d6557fa55afde88c",
"blk.27.ffn_norm.weight": "fc6242d8cb8a4a37a8ddb7e41e7e60a63d4a89edf36acb35df052f10b9c91ece",
"blk.27.ffn_up.weight": "8df39b09c4801f343aca78f2918a1f6db78c8c55e591eda4c69eadb74c26e180",
"blk.28.attn_norm.weight": "75b539308f77e3cefdc6d98484d8b5cbf0538f0c2869a77b7373a145a18bc850",
"blk.28.attn_output.weight": "ae128940eb60a6d2e121762ef4b3e9dcf9eb3e105b249507fa7f12de0e19822c",
"blk.28.attn_qkv.weight": "bdda781c288e9326c240e33905f8e621b6a2ad902e620739d34f93fcd6f933de",
"blk.28.ffn_down.weight": "f1d6e6d1c286b1138bfd7e53fe477f399ae93bc2c04e35416f84218ed7247965",
"blk.28.ffn_norm.weight": "3f837ce82c8b9bde0d61d08b6f5fe5574886ea5328dbdc53f2929f18da8b4087",
"blk.28.ffn_up.weight": "2af027002e31d1b6cfedbdb30a2b9d7213f3aa691167c353913adfd48fda31e4",
"blk.29.attn_norm.weight": "61e8003b5329462ffe0fe172f2b160260de006aed858332d49d75504b6b6aa7a",
"blk.29.attn_output.weight": "ca44542a72a37476dc73dbdcc01f5b7497cb3ebc4ea230a55c9634ccd8e56ad4",
"blk.29.attn_qkv.weight": "abb3d9d6abe57872ae3daa51935d43264093ded5ce63b49d1e280ee5758be0e4",
"blk.29.ffn_down.weight": "6764b895fce881df097489c263446f0106de36217997660c15984b3ee22a5a06",
"blk.29.ffn_norm.weight": "89e03e9a33fc0e6e31ba9f0c2bd7c5734a118c5602bb90148793e08a80e8d0ae",
"blk.29.ffn_up.weight": "fa7ad57a84954f4121653152efed1a871d8adb20a1ea9086e3e849ce359d7d2e",
"blk.30.attn_norm.weight": "91a697aca1e42af54f806a20211031c3369e8d0bd58df1b0147fe24954e1f5a4",
"blk.30.attn_output.weight": "36063fcf766c89ac75be56f688cc63cefe5f2c733fbf4378ea9956ad386fa148",
"blk.30.attn_qkv.weight": "2cacd1161f1121a2c0b979930134f4666f73fb8d7237b3b0659ae091b15955a6",
"blk.30.ffn_down.weight": "9f3fcb6217100595850c05dc98f9ab2a263afdb6ab28df2fcb08aeff512057d7",
"blk.30.ffn_norm.weight": "6c600bc1fc7de39d4f8917b81fc7d1d5ed2a9b56492234c13a4bd6028c30d880",
"blk.30.ffn_up.weight": "73cabd1bb011956b2689ea3338bb76642ef3a57c197377d666d2ab5f56317668",
"blk.31.attn_norm.weight": "72d3e1cc771380645fa75a899858c95f39857a4f3f1ed60fe1578df383b8bc53",
"blk.31.attn_output.weight": "40089cdd29994dc19a1d89fa15902a89cfeca3540f12dc9bf4d00ef82506e456",
"blk.31.attn_qkv.weight": "1d0bb40e9258071ae14290a53c619a8e331dda07354d2a02ef45766c029ae5e4",
"blk.31.ffn_down.weight": "8defa0e06335b793fa8be03883f0a322d6c5b33f52c69c943c35c60d16e42c0a",
"blk.31.ffn_norm.weight": "33c55d9d0c496ccfb130361fe131649346e098abaaac39c0519507e5d846721d",
"blk.31.ffn_up.weight": "599f6503f61c692c1f82001973d35119f9688db5e6be9d9c298411491c93f09b",
"output.weight": "14b8dc662bfa3308ebb2e102c562d8e52c15670e538f20f3216a9c310ca9dd41",
"output_norm.weight": "7f2294ba94ce65681df6c7ddd8698799199b9d77dc83c10bdad5c3999f0fdb82",
"rope_factors_long.weight": "e34d378664e354652c38f47d10dafb0498ccc2fb042d39ff7fef768146fff22b",
"rope_factors_short.weight": "9379146a4988f373d362fe47b06c75e7fe7c54aa4dc9558758df79b7a87471fd",
"token_embd.weight": "19a03c1fb5ac0baee93b0a7d8b0f26e9a9b011e229b694afc50ebfc13d84f8bf"
}

View File

@@ -16,9 +16,7 @@ If the model being imported is one of these architectures, it can be imported di
- LlamaForCausalLM
- MistralForCausalLM
- MixtralForCausalLM
- GemmaForCausalLM
- Phi3ForCausalLM
```dockerfile
FROM /path/to/safetensors/directory

View File

@@ -182,6 +182,7 @@ curl http://localhost:11434/v1/embeddings \
- [x] Reproducible outputs
- [x] Vision
- [x] Tools (streaming support coming soon)
- [ ] Vision
- [ ] Logprobs
#### Supported request fields

View File

@@ -112,9 +112,15 @@ Keep the following tips and best practices in mind when working with Go template
ChatML is a popular template format. It can be used for models such as Databrick's DBRX, Intel's Neural Chat, and Microsoft's Orca 2.
```gotmpl
{{- if .System }}<|im_start|>system
{{ .System }}<|im_end|>
{{ end }}
{{- range .Messages }}<|im_start|>{{ .Role }}
{{ .Content }}<|im_end|>
{{ end }}<|im_start|>assistant
{{ else }}
{{ if .System }}<|im_start|>system
{{ .System }}<|im_end|>
```
### Example Tools

2
go.mod
View File

@@ -1,6 +1,6 @@
module github.com/ollama/ollama
go 1.22.5
go 1.22.0
require (
github.com/containerd/console v1.0.3

View File

@@ -1040,7 +1040,6 @@ struct llama_server_context
img.request_encode_image = false;
}
LOG_TEE("slot has images: %d\n", slot.images.size());
return slot.images.size() > 0;
}
@@ -1272,150 +1271,6 @@ struct llama_server_context
}
}
/* bool process_images_paligemma(server_slot &slot, int n_batch)
{
// set_off_embeds(ctx);
int n_past = 0;
int image_idx = 0;
slot_image &img = slot.images[image_idx];
// rescale image embeddings
float *data = img.image_embedding;
for (int i = 0; i < 2048 * 256; i++)
{
data[i] = data[i] / sqrt(2048);
}
if (ctx)
{
// set_image_embeds(ctx, data);
// print_embeds(ctx);
}
else
{
printf("ctx is null");
}
// generate user_prompt -> this should contain image tokens prepended and a new line appended:
// batch.n_tokens += (int)slot.images.size() * llama_n_embd(model);
std::vector<llama_token> tokens;
std::string prompt = "caption es";
std::vector<llama_token> text = ::llama_tokenize(ctx, prompt, false, true);
for (int i = 0; i < (int)slot.images.size() * 256; i++)
{
tokens.push_back(257152);
}
tokens.push_back(2);
for (int i = 0; i < text.size(); i++)
{
// printf("token [%d]: %d\n", text[i]);
tokens.push_back(text[i]);
}
tokens.push_back(108);
batch.n_tokens = (int)slot.images.size() * 256 + 2 + text.size();
printf("\nbatch.n_tokens %d\n", batch.n_tokens);
for (int i = 0; i < batch.n_tokens; i++)
{
printf("token %d: %d\n", i, tokens[i]);
}
for (int i = 0; i < batch.n_tokens; i += n_batch)
{
printf("calling decode\n");
int n_eval = (int)batch.n_tokens - i;
if (n_eval > n_batch)
{
n_eval = n_batch;
}
printf("n_eval: %d, n_past: %d, slot.n_past: %d\n", n_eval, n_past, slot.n_past);
llama_set_causal_attn(ctx, false);
printf("DEBUGGING DECODE BATCH:\n");
for (int j = 0; j < n_eval; j++)
{
printf("token[%d]: %d\n", j, tokens[j]);
}
llama_batch my_batch = llama_batch_get_one(&tokens[i], n_eval, 0, 0);
printf("%s: viewing batch: n_tokens = %d, batch.token %d, batch.pos = %d, batch.logits = %d\n", __func__, n_eval, batch.token + i, batch.pos + i, batch.logits + i);
for (int j = 0; j < n_eval; j++)
{
// printf("new batch view token [%d]: %d\n", j, (batch.token[i + j]));
}
printf("%s: viewing batch: n_tokens = %d, batch.token %d, batch.pos = %d, batch.logits = %d\n", __func__, n_eval, my_batch.token + i, my_batch.pos + i, my_batch.logits + i);
for (int j = 0; j < n_eval; j++)
{
// printf("new batch view token [%d]: %d\n", j, (my_batch.token[i + j]));
}
printf("n_eval: %d, llama_pos: %d, llama_seq_id: %d\n", n_eval, 0, 0);
if (llama_decode(ctx, llama_batch_get_one(&tokens[i], n_eval, 0, 0)))
{
printf("%s : failed to eval. token %d/%d (batch size %d, n_past %d)\n", __func__, i, batch.n_tokens, n_batch, n_past);
return false;
}
llama_set_causal_attn(ctx, true);
slot.n_past += n_eval;
}
printf("done processing images paligemma\n");
// llama_batch_clear(batch);
return true;
} */
bool prepare_pali(server_slot &slot, int n_batch)
{
// set_off_embeds(ctx);
int n_past = 0;
int image_idx = 0;
slot_image &img = slot.images[image_idx];
// rescale image embeddings
float *data = img.image_embedding;
for (int i = 0; i < 2048 * 256; i++)
{
data[i] = data[i] / sqrt(2048);
}
set_image_embeds(ctx, data);
// generate user_prompt -> this should contain image tokens prepended and a new line appended:
// batch.n_tokens += (int)slot.images.size() * llama_n_embd(model);
std::vector<llama_token> tokens;
std::string prompt = "How much ketchup is in this image?";
std::vector<llama_token> text = ::llama_tokenize(ctx, prompt, false, true);
for (int i = 0; i < (int)slot.images.size() * 256; i++)
{
tokens.push_back(257152);
}
tokens.push_back(2);
for (int i = 0; i < text.size(); i++)
{
// printf("token [%d]: %d\n", text[i]);
tokens.push_back(text[i]);
}
tokens.push_back(108);
printf("currently, system_tokens.size %d\n", system_tokens.size());
for (int i = 0; i < (int)tokens.size(); ++i)
{
llama_batch_add(batch, tokens[i], system_tokens.size() + slot.n_past, {slot.id}, true);
slot.n_past += 1;
}
// llama_set_causal_attn(ctx, false);
printf("slot.n_past == %d\n", slot.n_past);
return true;
}
// for multiple images processing
bool ingest_images(server_slot &slot, int n_batch)
{
@@ -1696,15 +1551,6 @@ struct llama_server_context
}
bool update_slots() {
/* gpt_params params;
params.model = "/Users/joshyan/Projects/PaliGemma/paligemma-3b-pt-224-text-model-f16.gguf";
llama_model_params model_params = llama_model_params_from_gpt_params(params);
llama_model *model = llama_load_model_from_file(params.model.c_str(), model_params);
llama_context_params ctx_params = llama_context_params_from_gpt_params(params);
llama_context *ctx_llama = llama_new_context_with_model(model, ctx_params);
ctx = ctx_llama; */
if (system_need_update)
{
LOG_DEBUG("updating system prompt", {});
@@ -1965,15 +1811,9 @@ struct llama_server_context
const bool has_images = process_images(slot);
// process the prefix of first image
std::vector<llama_token> prefix_tokens = has_images ? tokenize(slot.images[0].prefix_prompt, false) : prompt_tokens;
printf("\nprinting prefix tokens\n");
for (int i = 0; i < prefix_tokens.size(); i++)
{
printf("prefix token[%d]: %d\n", i, prefix_tokens[i]);
}
std::vector<llama_token> prefix_tokens = has_images ? tokenize(slot.images[0].prefix_prompt, add_bos_token) : prompt_tokens;
int32_t slot_npast = slot.n_past_se > 0 ? slot.n_past_se : slot.n_past;
printf("slot_npast = %d\n", slot_npast);
int32_t ga_i = slot.ga_i;
int32_t ga_n = slot.ga_n;
@@ -1993,25 +1833,18 @@ struct llama_server_context
slot_npast++;
}
LOG_ERROR("checking has images", {
{"has images", has_images},
{"task_id", slot.task_id},
});
// if (has_images && !ingest_images(slot, n_batch))
if (has_images && !prepare_pali(slot, n_batch))
if (has_images && !ingest_images(slot, n_batch))
{
LOG_ERROR("failed processing images", {
{"slot_id", slot.id},
{"task_id", slot.task_id},
});
{"slot_id", slot.id},
{"task_id", slot.task_id},
});
// FIXME @phymbert: to be properly tested
// early returning without changing the slot state will block the slot for ever
// no one at the moment is checking the return value
return false;
}
print_causal(ctx);
printf("batch.n_tokens here for setting logits: %d\n", batch.n_tokens);
// extract the logits only for the last token
if (batch.n_tokens > 0)
{
@@ -2026,58 +1859,18 @@ struct llama_server_context
if (batch.n_tokens == 0)
{
/* completion_token_output result;
const llama_token id = llama_sampling_sample(slots[0].ctx_sampling, ctx, NULL, slots[0].i_batch);
llama_sampling_accept(slots[0].ctx_sampling, ctx, id, true);
slots[0].n_decoded += 1;
if (slots[0].n_decoded == 1)
{
slots[0].t_start_genereration = ggml_time_us();
slots[0].t_prompt_processing = (slots[0].t_start_genereration - slots[0].t_start_process_prompt) / 1e3;
metrics.on_prompt_eval(slots[0]);
}
llama_token_data_array cur_p = {slots[0].ctx_sampling->cur.data(), slots[0].ctx_sampling->cur.size(), false};
result.tok = id;
const int32_t n_probs = slots[0].sparams.n_probs;
if (slots[0].sparams.temp <= 0 && n_probs > 0)
{
// for llama_sample_token_greedy we need to sort candidates
llama_sample_softmax(ctx, &cur_p);
}
for (size_t i = 0; i < std::min(cur_p.size, (size_t)n_probs); ++i)
{
result.probs.push_back({cur_p.data[i].id, cur_p.data[i].p});
}
if (!process_token(result, slots[0]))
{
slots[0].release();
slots[0].print_timings();
send_final_response(slots[0]);
metrics.on_prediction(slots[0]);
}
slots[0].i_batch = -1; */
all_slots_are_idle = true;
return true;
}
printf("batch.n_tokens = %d\n", batch.n_tokens);
for (int32_t i = 0; i < (int32_t) batch.n_tokens; i += n_batch)
{
printf("i = %d\n", i);
const int32_t n_tokens = std::min(n_batch, batch.n_tokens - i);
for (auto & slot : slots)
{
if (slot.ga_n != 1)
{
printf("slot.ga_n = %d\n", slot.ga_n);
// context extension via Self-Extend
while (slot.n_past_se >= slot.ga_i + slot.ga_w)
{
@@ -2104,30 +1897,20 @@ struct llama_server_context
}
}
printf("batching\n");
llama_batch batch_view =
{
n_tokens,
batch.token + i,
nullptr,
batch.pos + i,
batch.n_seq_id + i,
batch.seq_id + i,
batch.logits + i,
0, 0, 0, // unused
};
// llama_batch batch_view = prepare_pali(slots[0], n_batch);
printf("%s: viewing batch: n_tokens = %d, batch.token %d, batch.pos = %d, batch.logits = %d\n", __func__, n_tokens, batch.token + i, batch.pos + i, batch.logits + i);
for (int j = 0; j < n_tokens; j++)
{
printf("new batch view token [%d]: %d\n", j, (batch.token[i + j]));
}
printf("current state of causal attn: ");
print_causal(ctx);
n_tokens,
batch.token + i,
nullptr,
batch.pos + i,
batch.n_seq_id + i,
batch.seq_id + i,
batch.logits + i,
0, 0, 0, // unused
};
const int ret = llama_decode(ctx, batch_view);
llama_set_causal_attn(ctx, true);
print_causal(ctx);
if (ret != 0)
{
if (n_batch == 1 || ret < 0)
@@ -2147,7 +1930,6 @@ struct llama_server_context
for (auto & slot : slots)
{
printf("there are currently n slots\n");
if (slot.i_batch < (int) i || slot.i_batch >= (int) (i + n_tokens))
{
continue;
@@ -2156,7 +1938,6 @@ struct llama_server_context
// prompt evaluated for embedding
if (slot.embedding)
{
printf("slot.embedding is true\n");
send_embedding(slot, batch_view);
slot.release();
slot.i_batch = -1;
@@ -2164,10 +1945,8 @@ struct llama_server_context
}
completion_token_output result;
printf("sampling for the ith token: %d\n", slot.i_batch - i);
// batch.logits[263] = true;
const llama_token id = llama_sampling_sample(slot.ctx_sampling, ctx, NULL, slot.i_batch - i);
printf("got back this token: %d\n", id);
llama_sampling_accept(slot.ctx_sampling, ctx, id, true);
slot.n_decoded += 1;

View File

@@ -9,8 +9,8 @@ set -o pipefail
echo "Starting darwin generate script"
source $(dirname $0)/gen_common.sh
init_vars
#git_module_setup
#apply_patches
git_module_setup
apply_patches
sign() {
if [ -n "$APPLE_IDENTITY" ]; then
@@ -97,5 +97,5 @@ case "${GOARCH}" in
;;
esac
#cleanup
cleanup
echo "go generate completed. LLM runners: $(cd ${BUILD_DIR}/..; echo *)"

View File

@@ -157,14 +157,6 @@ type Tensor struct {
io.WriterTo `json:"-"`
}
func (t Tensor) block() (n int) {
if _, err := fmt.Sscanf(t.Name, "blk.%d.", &n); err != nil {
return -1
}
return
}
func (t Tensor) blockSize() uint64 {
switch t.Kind {
case 0, 1, 24, 25, 26, 27, 28, 30: // F32, F16, I8, I16, I32, I64, F64, BF16

View File

@@ -532,14 +532,15 @@ func WriteGGUF(ws io.WriteSeeker, kv KV, ts []Tensor) error {
}
}
slices.SortStableFunc(ts, func(a, b Tensor) int {
if i, j := a.block(), b.block(); i < 0 && j > 0 {
return 1
} else if i > 0 && j < 0 {
return -1
} else {
return cmp.Compare(i, j)
slices.SortFunc(ts, func(a, b Tensor) int {
var i, j int
if n, err := fmt.Sscanf(a.Name, "blk.%d", &i); err != nil || n != 1 {
return cmp.Compare(a.Name, b.Name)
} else if n, err := fmt.Sscanf(b.Name, "blk.%d", &j); err != nil || n != 1 {
return cmp.Compare(a.Name, b.Name)
}
return cmp.Compare(i, j)
})
var s uint64

View File

@@ -1,311 +0,0 @@
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
index 54aa822c..45d03982 100644
--- a/examples/llava/clip.cpp
+++ b/examples/llava/clip.cpp
@@ -765,9 +765,12 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
embeddings = ggml_mul_mat(ctx0, model.mm_0_w, embeddings);
embeddings = ggml_add(ctx0, embeddings, model.mm_0_b);
- embeddings = ggml_gelu(ctx0, embeddings);
- embeddings = ggml_mul_mat(ctx0, model.mm_2_w, embeddings);
- embeddings = ggml_add(ctx0, embeddings, model.mm_2_b);
+ // paligemma missing second linear layer
+ if (model.mm_2_w) {
+ embeddings = ggml_gelu(ctx0, embeddings);
+ embeddings = ggml_mul_mat(ctx0, model.mm_2_w, embeddings);
+ embeddings = ggml_add(ctx0, embeddings, model.mm_2_b);
+ }
} else if (ctx->proj_type == PROJECTOR_TYPE_MLP_NORM) {
embeddings = ggml_mul_mat(ctx0, model.mm_0_w, embeddings);
@@ -2542,7 +2545,10 @@ int clip_n_mmproj_embd(const struct clip_ctx * ctx) {
return ctx->vision_model.mm_model_peg_0_b->ne[0];
}
if (ctx->proj_type == PROJECTOR_TYPE_MLP) {
- return ctx->vision_model.mm_2_b->ne[0];
+ // paligemma missing second linear layer
+ if (ctx->vision_model.mm_2_b == nullptr) {
+ return ctx->vision_model.mm_0_b->ne[0];
+ }
}
if (ctx->proj_type == PROJECTOR_TYPE_MLP_NORM) {
return ctx->vision_model.mm_3_b->ne[0];
diff --git a/examples/llava/llava-cli.cpp b/examples/llava/llava-cli.cpp
index 8c7dd2ae..38eeb305 100644
--- a/examples/llava/llava-cli.cpp
+++ b/examples/llava/llava-cli.cpp
@@ -18,7 +18,10 @@ static bool eval_tokens(struct llama_context * ctx_llama, std::vector<llama_toke
if (n_eval > n_batch) {
n_eval = n_batch;
}
- if (llama_decode(ctx_llama, llama_batch_get_one(&tokens[i], n_eval, *n_past, 0))) {
+
+ llama_batch my_batch = llama_batch_get_one(&tokens[i], n_eval, *n_past, 0);
+ if (llama_decode(ctx_llama, my_batch))
+ {
LOG_TEE("%s : failed to eval. token %d/%d (batch size %d, n_past %d)\n", __func__, i, N, n_batch, *n_past);
return false;
}
@@ -36,6 +39,11 @@ static bool eval_id(struct llama_context * ctx_llama, int id, int * n_past) {
static bool eval_string(struct llama_context * ctx_llama, const char* str, int n_batch, int * n_past, bool add_bos){
std::string str2 = str;
std::vector<llama_token> embd_inp = ::llama_tokenize(ctx_llama, str2, add_bos, true);
+ embd_inp.push_back(108);
+ for (int i = 0; i < embd_inp.size(); i++)
+ {
+ printf("token[%d]: %d\n", i, embd_inp[i]);
+ }
eval_tokens(ctx_llama, embd_inp, n_batch, n_past);
return true;
}
@@ -183,9 +191,17 @@ static void process_prompt(struct llava_context * ctx_llava, struct llava_image_
}
}
- eval_string(ctx_llava->ctx_llama, system_prompt.c_str(), params->n_batch, &n_past, true);
- llava_eval_image_embed(ctx_llava->ctx_llama, image_embed, params->n_batch, &n_past);
- eval_string(ctx_llava->ctx_llama, user_prompt.c_str(), params->n_batch, &n_past, false);
+ // build user prompt with 256 image tokens
+ user_prompt = "What is in this image?";
+ std::string image_token_prefix = "";
+ for (int i = 0; i < 256; i++) {
+ image_token_prefix += "<image>";
+ }
+ std::string user_prompt_with_images = image_token_prefix + "<bos>" + user_prompt;
+
+ llama_set_causal_attn(ctx_llava->ctx_llama, true);
+ eval_string(ctx_llava->ctx_llama, user_prompt_with_images.c_str(), params->n_batch, &n_past, false);
+ // llama_set_causal_attn(ctx_llava->ctx_llama, true);
// generate the response
@@ -324,6 +340,19 @@ int main(int argc, char ** argv) {
return 1;
}
+ if (!image_embed || !image_embed->embed) {
+ std::cerr << "Error: image_embed or image_embed->embed is null." << std::endl;
+ return 1;
+ }
+
+ // image feature scaling
+ float *data = image_embed->embed;
+ for (int i = 0; i < 2048 * 256; i++) {
+ data[i] = data[i] / sqrt(2048);
+ }
+
+ set_image_embeds(ctx_llava->ctx_llama, image_embed->embed);
+
// process the prompt
process_prompt(ctx_llava, image_embed, &params, params.prompt);
diff --git a/include/llama.h b/include/llama.h
index ce07f4fa..c3465d68 100644
--- a/include/llama.h
+++ b/include/llama.h
@@ -444,6 +444,13 @@ extern "C" {
// Frees all allocated memory
LLAMA_API void llama_free(struct llama_context * ctx);
+ // save image embeddings
+ LLAMA_API void set_image_embeds(struct llama_context *ctx, float *data);
+
+ LLAMA_API void print_embeds(struct llama_context *ctx);
+
+ LLAMA_API void print_causal(struct llama_context *ctx);
+
LLAMA_API int64_t llama_time_us(void);
LLAMA_API size_t llama_max_devices(void);
diff --git a/src/llama.cpp b/src/llama.cpp
index 7f2f0003..d5926202 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -2677,6 +2677,7 @@ struct llama_context {
const struct llama_model & model;
+ float *image_embeds;
struct llama_cparams cparams;
struct llama_sampling sampling;
struct llama_kv_cache kv_self;
@@ -2760,6 +2761,33 @@ struct llama_context {
struct ggml_tensor * inp_KQ_mask_cross; // F32 [n_outputs_enc, n_batch]
};
+void set_image_embeds(llama_context *ctx, float *data) {
+ ctx->image_embeds = data;
+}
+
+void print_embeds(struct llama_context *ctx)
+{
+ if (ctx->image_embeds)
+ {
+ for (int i = 0; i < 256; i++)
+ {
+ LLAMA_LOG_INFO("%f ", ctx->image_embeds[i]);
+ }
+ }
+}
+
+void print_causal(llama_context *ctx)
+{
+ if (ctx->cparams.causal_attn)
+ {
+ LLAMA_LOG_INFO("causal attn is true\n");
+ }
+ else
+ {
+ LLAMA_LOG_INFO("causal attn is false\n");
+ }
+}
+
struct llama_lora_weight {
struct ggml_tensor * a = nullptr;
struct ggml_tensor * b = nullptr;
@@ -3021,6 +3049,96 @@ static bool llama_kv_cache_init(
return true;
}
+void llama_log_tensor(ggml_tensor *tensor, char *filename)
+{
+ if (tensor == NULL)
+ {
+ fprintf(stderr, "Tensor is NULL\n");
+ return;
+ }
+
+ FILE *fp = fopen(filename, "wb");
+ if (fp == NULL)
+ {
+ fprintf(stderr, "Failed to open file '%s'\n", filename);
+ return;
+ }
+
+ LLAMA_LOG_INFO("Tensor name: %s\n", tensor->name);
+ LLAMA_LOG_INFO("Tensor type: ");
+ switch (tensor->type)
+ {
+ case GGML_TYPE_F32:
+ LLAMA_LOG_INFO("GGML_TYPE_F32\n");
+ break;
+ case GGML_TYPE_F16:
+ printf("GGML_TYPE_F16\n");
+ break;
+ case GGML_TYPE_Q4_0:
+ printf("GGML_TYPE_Q4_0\n");
+ break;
+ case GGML_TYPE_Q4_1:
+ printf("GGML_TYPE_Q4_1\n");
+ break;
+ default:
+ printf("Unknown\n");
+ }
+
+ LLAMA_LOG_INFO("Tensor dimensions: ");
+ for (int i = 0; i < GGML_MAX_DIMS; i++)
+ {
+ if (tensor->ne[i] == 1)
+ break;
+ printf("%ld ", tensor->ne[i]);
+ }
+ printf("\n");
+
+ size_t num_elements = ggml_nelements(tensor);
+ LLAMA_LOG_INFO("num elements: %zu\n", num_elements);
+
+ LLAMA_LOG_INFO("Tensor data:\n");
+ switch (tensor->type)
+ {
+ case GGML_TYPE_F32:
+ {
+ float *data = (float *)tensor->data;
+ for (size_t i = 0; i < num_elements; i++)
+ {
+ fprintf(fp, "%f ", data[i]);
+ if (i % 2048 == 0 && i != 0)
+ {
+ fprintf(fp, "\n");
+ }
+ }
+ /* for (size_t i = 0; i < 25; i++)
+ {
+ LLAMA_LOG_INFO("%f ", data[i]);
+ if (i % 2048 == 0 && i != 0)
+ {
+ LLAMA_LOG_INFO("\n");
+ }
+ } */
+ }
+ break;
+ case GGML_TYPE_F16:
+ {
+ // Implement custom printing for fp16 data
+ fprintf(fp, "F16 data (not shown)\n");
+ }
+ break;
+ // For quantized types, you might need to implement custom printing logic
+ case GGML_TYPE_Q4_0:
+ case GGML_TYPE_Q4_1:
+ fprintf(fp, "Quantized data (not shown)\n");
+ break;
+ default:
+ fprintf(fp, "Unknown data type\n");
+ }
+ fprintf(fp, "\n");
+
+ fclose(fp);
+}
+
// find an empty slot of size "n_tokens" in the cache
// updates the cache head
// Note: On success, it's important that cache.head points
@@ -11660,6 +11778,18 @@ struct llm_build_context {
inpL = llm_build_inp_embd(ctx0, lctx, hparams, batch, model.tok_embd, cb);
+ // set the image embeddings in the input tensor
+ if (lctx.image_embeds) {
+ struct ggml_tensor *image_embeds = ggml_dup_tensor(ctx0, inpL);
+ image_embeds->data = lctx.image_embeds;
+ image_embeds->ne[1] = 256;
+ print_embeds(&lctx);
+ // llama_log_tensor(image_embeds, "/Users/joshyan/ollama/tensordata");
+
+ inpL = ggml_set_2d_inplace(ctx0, inpL, image_embeds, inpL->nb[1], 0);
+ lctx.image_embeds = NULL;
+ }
+
inpL = ggml_scale(ctx0, inpL, sqrtf(n_embd));
cb(inpL, "inp_scaled", -1);
@@ -14678,7 +14808,7 @@ static int llama_decode_internal(
}
// non-causal masks do not use the KV cache
- if (hparams.causal_attn) {
+ if (hparams.causal_attn || lctx.image_embeds) {
llama_kv_cache_update(&lctx);
// if we have enough unused cells before the current head ->
@@ -18565,6 +18695,12 @@ float * llama_get_logits_ith(struct llama_context * ctx, int32_t i) {
if (ctx->logits == nullptr) {
throw std::runtime_error("no logits");
}
+ // LLAMA_LOG_INFO("CURRENTLY, I IS %d\n", i);
+ // printf("currently, i is: %d", i);
+ /* for (int i = 0; i < 263; i++)
+ {
+ printf("output_ids[%d]: %d\n", i, ctx->output_ids[i]);
+ } */
if (i < 0) {
j = ctx->n_outputs + i;
@@ -18577,6 +18713,7 @@ float * llama_get_logits_ith(struct llama_context * ctx, int32_t i) {
j = ctx->output_ids[i];
}
+ j = 0;
if (j < 0) {
throw std::runtime_error(format("batch.logits[%d] != true", i));
}

View File

@@ -179,7 +179,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
}
}
}
opts.NumGPU = 0
if len(servers) == 0 {
return nil, fmt.Errorf("no servers found for %v", gpus)
}
@@ -733,7 +733,7 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu
"n_predict": req.Options.NumPredict,
"n_keep": req.Options.NumKeep,
"main_gpu": req.Options.MainGPU,
"temperature": 0,
"temperature": req.Options.Temperature,
"top_k": req.Options.TopK,
"top_p": req.Options.TopP,
"min_p": req.Options.MinP,

View File

@@ -3,12 +3,11 @@ package progress
import (
"fmt"
"strings"
"sync/atomic"
"time"
)
type Spinner struct {
message atomic.Value
message string
messageWidth int
parts []string
@@ -22,25 +21,20 @@ type Spinner struct {
func NewSpinner(message string) *Spinner {
s := &Spinner{
message: message,
parts: []string{
"⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏",
},
started: time.Now(),
}
s.SetMessage(message)
go s.start()
return s
}
func (s *Spinner) SetMessage(message string) {
s.message.Store(message)
}
func (s *Spinner) String() string {
var sb strings.Builder
if message, ok := s.message.Load().(string); ok && len(message) > 0 {
message := strings.TrimSpace(message)
if len(s.message) > 0 {
message := strings.TrimSpace(s.message)
if s.messageWidth > 0 && len(message) > s.messageWidth {
message = message[:s.messageWidth]
}

View File

@@ -62,7 +62,7 @@ func (b *Buffer) MoveLeft() {
rLength := runewidth.RuneWidth(r)
if b.DisplayPos%b.LineWidth == 0 {
fmt.Print(CursorUp + CursorBOL + CursorRightN(b.Width))
fmt.Printf(CursorUp + CursorBOL + cursorRightN(b.Width))
if rLength == 2 {
fmt.Print(CursorLeft)
}
@@ -74,7 +74,7 @@ func (b *Buffer) MoveLeft() {
fmt.Print(CursorLeft)
}
} else {
fmt.Print(CursorLeftN(rLength))
fmt.Print(cursorLeftN(rLength))
}
b.Pos -= 1
@@ -115,15 +115,15 @@ func (b *Buffer) MoveRight() {
b.DisplayPos += rLength
if b.DisplayPos%b.LineWidth == 0 {
fmt.Print(CursorDown + CursorBOL + CursorRightN(len(b.Prompt.prompt())))
fmt.Printf(CursorDown + CursorBOL + cursorRightN(len(b.Prompt.prompt())))
} else if (b.DisplayPos-rLength)%b.LineWidth == b.LineWidth-1 && hasSpace {
fmt.Print(CursorDown + CursorBOL + CursorRightN(len(b.Prompt.prompt())+rLength))
fmt.Printf(CursorDown + CursorBOL + cursorRightN(len(b.Prompt.prompt())+rLength))
b.DisplayPos += 1
} else if b.LineHasSpace.Size() > 0 && b.DisplayPos%b.LineWidth == b.LineWidth-1 && hasSpace {
fmt.Print(CursorDown + CursorBOL + CursorRightN(len(b.Prompt.prompt())))
fmt.Printf(CursorDown + CursorBOL + cursorRightN(len(b.Prompt.prompt())))
b.DisplayPos += 1
} else {
fmt.Print(CursorRightN(rLength))
fmt.Print(cursorRightN(rLength))
}
}
}
@@ -154,7 +154,7 @@ func (b *Buffer) MoveToStart() {
fmt.Print(CursorUp)
}
}
fmt.Print(CursorBOL + CursorRightN(len(b.Prompt.prompt())))
fmt.Printf(CursorBOL + cursorRightN(len(b.Prompt.prompt())))
b.Pos = 0
b.DisplayPos = 0
}
@@ -169,9 +169,9 @@ func (b *Buffer) MoveToEnd() {
fmt.Print(CursorDown)
}
remainder := b.DisplaySize() % b.LineWidth
fmt.Print(CursorBOL + CursorRightN(len(b.Prompt.prompt())+remainder))
fmt.Printf(CursorBOL + cursorRightN(len(b.Prompt.prompt())+remainder))
} else {
fmt.Print(CursorRightN(b.DisplaySize() - b.DisplayPos))
fmt.Print(cursorRightN(b.DisplaySize() - b.DisplayPos))
}
b.Pos = b.Buf.Size()
@@ -286,7 +286,8 @@ func (b *Buffer) drawRemaining() {
remLength := runewidth.StringWidth(remainingText)
if len(currLine) > 0 {
fmt.Print(ClearToEOL + currLine + CursorLeftN(currLineSpace))
fmt.Printf(ClearToEOL + currLine)
fmt.Print(cursorLeftN(currLineSpace))
} else {
fmt.Print(ClearToEOL)
}
@@ -300,9 +301,9 @@ func (b *Buffer) drawRemaining() {
}
if (b.DisplayPos+currLineSpace)%b.LineWidth == 0 && currLine == remainingText {
fmt.Print(CursorRightN(currLineSpace))
fmt.Print(cursorRightN(currLineSpace))
fmt.Printf("\n%s", b.Prompt.AltPrompt)
fmt.Print(CursorUp + CursorBOL + CursorRightN(b.Width-currLineSpace))
fmt.Printf(CursorUp + CursorBOL + cursorRightN(b.Width-currLineSpace))
}
// render the other lines
@@ -332,7 +333,9 @@ func (b *Buffer) drawRemaining() {
lineLength += runewidth.RuneWidth(c)
fmt.Printf("%c", c)
}
fmt.Print(ClearToEOL + CursorUpN(totalLines) + CursorBOL + CursorRightN(b.Width-currLineSpace))
fmt.Print(ClearToEOL)
fmt.Print(cursorUpN(totalLines))
fmt.Printf(CursorBOL + cursorRightN(b.Width-currLineSpace))
hasSpace := b.GetLineSpacing(b.DisplayPos / b.LineWidth)
@@ -354,7 +357,8 @@ func (b *Buffer) Remove() {
if b.DisplayPos%b.LineWidth == 0 {
// if the user backspaces over the word boundary, do this magic to clear the line
// and move to the end of the previous line
fmt.Print(CursorBOL + ClearToEOL + CursorUp + CursorBOL + CursorRightN(b.Width))
fmt.Printf(CursorBOL + ClearToEOL)
fmt.Printf(CursorUp + CursorBOL + cursorRightN(b.Width))
if b.DisplaySize()%b.LineWidth < (b.DisplaySize()-rLength)%b.LineWidth {
b.LineHasSpace.Remove(b.DisplayPos/b.LineWidth - 1)
@@ -366,23 +370,24 @@ func (b *Buffer) Remove() {
}
if rLength == 2 {
fmt.Print(CursorLeft + " " + CursorLeftN(2))
fmt.Print(CursorLeft + " " + cursorLeftN(2))
} else {
fmt.Print(" " + CursorLeft)
}
} else if (b.DisplayPos-rLength)%b.LineWidth == 0 && hasSpace {
fmt.Print(CursorBOL + ClearToEOL + CursorUp + CursorBOL + CursorRightN(b.Width))
fmt.Printf(CursorBOL + ClearToEOL)
fmt.Printf(CursorUp + CursorBOL + cursorRightN(b.Width))
if b.Pos == b.Buf.Size() {
b.LineHasSpace.Remove(b.DisplayPos/b.LineWidth - 1)
}
b.DisplayPos -= 1
} else {
fmt.Print(CursorLeftN(rLength))
fmt.Print(cursorLeftN(rLength))
for range rLength {
fmt.Print(" ")
}
fmt.Print(CursorLeftN(rLength))
fmt.Print(cursorLeftN(rLength))
}
var eraseExtraLine bool
@@ -400,9 +405,9 @@ func (b *Buffer) Remove() {
// are trailing characters which go over the line width boundary
if eraseExtraLine {
remainingLines := (b.DisplaySize() - b.DisplayPos) / b.LineWidth
fmt.Print(CursorDownN(remainingLines+1) + CursorBOL + ClearToEOL)
fmt.Printf(cursorDownN(remainingLines+1) + CursorBOL + ClearToEOL)
place := b.DisplayPos % b.LineWidth
fmt.Print(CursorUpN(remainingLines+1) + CursorRightN(place+len(b.Prompt.prompt())))
fmt.Printf(cursorUpN(remainingLines+1) + cursorRightN(place+len(b.Prompt.prompt())))
}
}
}
@@ -417,9 +422,9 @@ func (b *Buffer) Delete() {
if b.DisplaySize()%b.LineWidth == 0 {
if b.DisplayPos != b.DisplaySize() {
remainingLines := (b.DisplaySize() - b.DisplayPos) / b.LineWidth
fmt.Print(CursorDownN(remainingLines) + CursorBOL + ClearToEOL)
fmt.Printf(cursorDownN(remainingLines) + CursorBOL + ClearToEOL)
place := b.DisplayPos % b.LineWidth
fmt.Print(CursorUpN(remainingLines) + CursorRightN(place+len(b.Prompt.prompt())))
fmt.Printf(cursorUpN(remainingLines) + cursorRightN(place+len(b.Prompt.prompt())))
}
}
}
@@ -466,17 +471,17 @@ func (b *Buffer) DeleteWord() {
}
func (b *Buffer) ClearScreen() {
fmt.Print(ClearScreen + CursorReset + b.Prompt.prompt())
fmt.Printf(ClearScreen + CursorReset + b.Prompt.prompt())
if b.IsEmpty() {
ph := b.Prompt.placeholder()
fmt.Print(ColorGrey + ph + CursorLeftN(len(ph)) + ColorDefault)
fmt.Printf(ColorGrey + ph + cursorLeftN(len(ph)) + ColorDefault)
} else {
currPos := b.DisplayPos
currIndex := b.Pos
b.Pos = 0
b.DisplayPos = 0
b.drawRemaining()
fmt.Print(CursorReset + CursorRightN(len(b.Prompt.prompt())))
fmt.Printf(CursorReset + cursorRightN(len(b.Prompt.prompt())))
if currPos > 0 {
targetLine := currPos / b.LineWidth
if targetLine > 0 {
@@ -486,10 +491,10 @@ func (b *Buffer) ClearScreen() {
}
remainder := currPos % b.LineWidth
if remainder > 0 {
fmt.Print(CursorRightN(remainder))
fmt.Print(cursorRightN(remainder))
}
if currPos%b.LineWidth == 0 {
fmt.Print(CursorBOL + b.Prompt.AltPrompt)
fmt.Printf(CursorBOL + b.Prompt.AltPrompt)
}
}
b.Pos = currIndex
@@ -508,13 +513,13 @@ func (b *Buffer) Replace(r []rune) {
b.Buf.Clear()
fmt.Print(CursorBOL + ClearToEOL)
fmt.Printf(CursorBOL + ClearToEOL)
for range lineNums {
fmt.Print(CursorUp + CursorBOL + ClearToEOL)
}
fmt.Print(CursorBOL + b.Prompt.prompt())
fmt.Printf(CursorBOL + b.Prompt.prompt())
for _, c := range r {
b.Add(c)
@@ -540,3 +545,19 @@ func (b *Buffer) StringNM(n, m int) string {
}
return s
}
func cursorLeftN(n int) string {
return fmt.Sprintf(CursorLeftN, n)
}
func cursorRightN(n int) string {
return fmt.Sprintf(CursorRightN, n)
}
func cursorUpN(n int) string {
return fmt.Sprintf(CursorUpN, n)
}
func cursorDownN(n int) string {
return fmt.Sprintf(CursorDownN, n)
}

View File

@@ -98,7 +98,7 @@ func (i *Instance) Readline() (string, error) {
showPlaceholder := !i.Pasting || i.Prompt.UseAlt
if buf.IsEmpty() && showPlaceholder {
ph := i.Prompt.placeholder()
fmt.Print(ColorGrey + ph + CursorLeftN(len(ph)) + ColorDefault)
fmt.Printf(ColorGrey + ph + fmt.Sprintf(CursorLeftN, len(ph)) + ColorDefault)
}
r, err := i.Terminal.Read()

View File

@@ -1,7 +1,5 @@
package readline
import "strconv"
const (
CharNull = 0
CharLineStart = 1
@@ -43,49 +41,34 @@ const (
)
const (
Esc = "\x1b"
CursorUp = "\033[1A"
CursorDown = "\033[1B"
CursorRight = "\033[1C"
CursorLeft = "\033[1D"
CursorSave = Esc + "[s"
CursorRestore = Esc + "[u"
CursorSave = "\033[s"
CursorRestore = "\033[u"
CursorEOL = Esc + "[E"
CursorBOL = Esc + "[1G"
CursorHide = Esc + "[?25l"
CursorShow = Esc + "[?25h"
CursorUpN = "\033[%dA"
CursorDownN = "\033[%dB"
CursorRightN = "\033[%dC"
CursorLeftN = "\033[%dD"
ClearToEOL = Esc + "[K"
ClearLine = Esc + "[2K"
ClearScreen = Esc + "[2J"
CursorReset = Esc + "[0;0f"
CursorEOL = "\033[E"
CursorBOL = "\033[1G"
CursorHide = "\033[?25l"
CursorShow = "\033[?25h"
ColorGrey = Esc + "[38;5;245m"
ColorDefault = Esc + "[0m"
ClearToEOL = "\033[K"
ClearLine = "\033[2K"
ClearScreen = "\033[2J"
CursorReset = "\033[0;0f"
StartBracketedPaste = Esc + "[?2004h"
EndBracketedPaste = Esc + "[?2004l"
)
ColorGrey = "\033[38;5;245m"
ColorDefault = "\033[0m"
func CursorUpN(n int) string {
return Esc + "[" + strconv.Itoa(n) + "A"
}
func CursorDownN(n int) string {
return Esc + "[" + strconv.Itoa(n) + "B"
}
func CursorRightN(n int) string {
return Esc + "[" + strconv.Itoa(n) + "C"
}
func CursorLeftN(n int) string {
return Esc + "[" + strconv.Itoa(n) + "D"
}
var (
CursorUp = CursorUpN(1)
CursorDown = CursorDownN(1)
CursorRight = CursorRightN(1)
CursorLeft = CursorLeftN(1)
StartBracketedPaste = "\033[?2004h"
EndBracketedPaste = "\033[?2004l"
)
const (

View File

@@ -94,7 +94,7 @@ func (p *blobDownloadPart) UnmarshalJSON(b []byte) error {
}
const (
numDownloadParts = 16
numDownloadParts = 64
minDownloadPartSize int64 = 100 * format.MegaByte
maxDownloadPartSize int64 = 1000 * format.MegaByte
)

View File

@@ -215,20 +215,25 @@ func GetManifest(mp ModelPath) (*Manifest, string, error) {
return nil, "", err
}
f, err := os.Open(fp)
if _, err = os.Stat(fp); err != nil {
return nil, "", err
}
var manifest *Manifest
bts, err := os.ReadFile(fp)
if err != nil {
return nil, "", err
return nil, "", fmt.Errorf("couldn't open file '%s'", fp)
}
defer f.Close()
sha256sum := sha256.New()
shaSum := sha256.Sum256(bts)
shaStr := hex.EncodeToString(shaSum[:])
var manifest Manifest
if err := json.NewDecoder(io.TeeReader(f, sha256sum)).Decode(&manifest); err != nil {
if err := json.Unmarshal(bts, &manifest); err != nil {
return nil, "", err
}
return &manifest, hex.EncodeToString(sha256sum.Sum(nil)), nil
return manifest, shaStr, nil
}
func GetModel(name string) (*Model, error) {
@@ -687,18 +692,43 @@ func CopyModel(src, dst model.Name) error {
return err
}
func deleteUnusedLayers(deleteMap map[string]struct{}) error {
manifests, err := Manifests()
func deleteUnusedLayers(skipModelPath *ModelPath, deleteMap map[string]struct{}) error {
fp, err := GetManifestPath()
if err != nil {
return err
}
for _, manifest := range manifests {
walkFunc := func(path string, info os.FileInfo, _ error) error {
if info.IsDir() {
return nil
}
dir, file := filepath.Split(path)
dir = strings.Trim(strings.TrimPrefix(dir, fp), string(os.PathSeparator))
tag := strings.Join([]string{dir, file}, ":")
fmp := ParseModelPath(tag)
// skip the manifest we're trying to delete
if skipModelPath != nil && skipModelPath.GetFullTagname() == fmp.GetFullTagname() {
return nil
}
// save (i.e. delete from the deleteMap) any files used in other manifests
manifest, _, err := GetManifest(fmp)
if err != nil {
return err
}
for _, layer := range manifest.Layers {
delete(deleteMap, layer.Digest)
}
delete(deleteMap, manifest.Config.Digest)
return nil
}
if err := filepath.Walk(fp, walkFunc); err != nil {
return err
}
// only delete the files which are still in the deleteMap
@@ -751,7 +781,8 @@ func PruneLayers() error {
slog.Info(fmt.Sprintf("total blobs: %d", len(deleteMap)))
if err := deleteUnusedLayers(deleteMap); err != nil {
err = deleteUnusedLayers(nil, deleteMap)
if err != nil {
slog.Error(fmt.Sprintf("couldn't remove unused layers: %v", err))
return nil
}
@@ -846,19 +877,26 @@ func PushModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
func PullModel(ctx context.Context, name string, regOpts *registryOptions, fn func(api.ProgressResponse)) error {
mp := ParseModelPath(name)
var manifest *Manifest
var err error
var noprune string
// build deleteMap to prune unused layers
deleteMap := make(map[string]struct{})
manifest, _, err := GetManifest(mp)
if errors.Is(err, os.ErrNotExist) {
// noop
} else if err != nil && !errors.Is(err, os.ErrNotExist) {
return err
} else {
for _, l := range manifest.Layers {
deleteMap[l.Digest] = struct{}{}
if !envconfig.NoPrune() {
manifest, _, err = GetManifest(mp)
if err != nil && !errors.Is(err, os.ErrNotExist) {
return err
}
if manifest.Config.Digest != "" {
deleteMap[manifest.Config.Digest] = struct{}{}
if manifest != nil {
for _, l := range manifest.Layers {
deleteMap[l.Digest] = struct{}{}
}
if manifest.Config.Digest != "" {
deleteMap[manifest.Config.Digest] = struct{}{}
}
}
}
@@ -937,9 +975,11 @@ func PullModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
return err
}
if !envconfig.NoPrune() && len(deleteMap) > 0 {
fn(api.ProgressResponse{Status: "removing unused layers"})
if err := deleteUnusedLayers(deleteMap); err != nil {
if noprune == "" {
fn(api.ProgressResponse{Status: "removing any unused layers"})
err = deleteUnusedLayers(nil, deleteMap)
if err != nil {
slog.Error(fmt.Sprintf("couldn't remove unused layers: %v", err))
fn(api.ProgressResponse{Status: fmt.Sprintf("couldn't remove unused layers: %v", err)})
}
}
@@ -960,12 +1000,12 @@ func pullModelManifest(ctx context.Context, mp ModelPath, regOpts *registryOptio
}
defer resp.Body.Close()
var m Manifest
var m *Manifest
if err := json.NewDecoder(resp.Body).Decode(&m); err != nil {
return nil, err
}
return &m, err
return m, err
}
// GetSHA256Digest returns the SHA256 hash of a given buffer and returns it, and the size of buffer

View File

@@ -5,7 +5,6 @@ import (
"encoding/hex"
"encoding/json"
"errors"
"fmt"
"io"
"log/slog"
"os"
@@ -151,16 +150,14 @@ func Manifests() (map[model.Name]*Manifest, error) {
n := model.ParseNameFromFilepath(rel)
if !n.IsValid() {
slog.Warn("bad manifest name", "path", rel)
slog.Warn("bad manifest name", "path", rel, "error", err)
continue
}
m, err := ParseNamedManifest(n)
if syntax := &(json.SyntaxError{}); errors.As(err, &syntax) {
if err != nil {
slog.Warn("bad manifest", "name", n, "error", err)
continue
} else if err != nil {
return nil, fmt.Errorf("%s: %w", n, err)
}
ms[n] = m

View File

@@ -176,20 +176,9 @@ func parseFromFile(ctx context.Context, file *os.File, digest string, fn func(ap
mediatype = "application/vnd.ollama.image.projector"
}
var layer Layer
if digest != "" && n == stat.Size() && offset == 0 {
layer, err = NewLayerFromLayer(digest, mediatype, file.Name())
if err != nil {
slog.Debug("could not create new layer from layer", "error", err)
}
}
// Fallback to creating layer from file copy (either NewLayerFromLayer failed, or digest empty/n != stat.Size())
if layer.Digest == "" {
layer, err = NewLayer(io.NewSectionReader(file, offset, n), mediatype)
if err != nil {
return nil, err
}
layer, err := NewLayer(io.NewSectionReader(file, offset, n), mediatype)
if err != nil {
return nil, err
}
layers = append(layers, &layerGGML{layer, ggml})

View File

@@ -2,10 +2,8 @@ package server
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"os"
"path/filepath"
"testing"
@@ -13,7 +11,6 @@ import (
"github.com/google/go-cmp/cmp"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/llm"
"github.com/ollama/ollama/template"
)
@@ -136,82 +133,3 @@ The temperature in San Francisco, CA is 70°F and in Toronto, Canada is 20°C.`,
})
}
}
func TestParseFromFileFromLayer(t *testing.T) {
tempModels := t.TempDir()
file, err := os.CreateTemp(tempModels, "")
if err != nil {
t.Fatalf("failed to open file: %v", err)
}
defer file.Close()
if err := llm.WriteGGUF(file, llm.KV{"general.architecture": "gemma"}, []llm.Tensor{}); err != nil {
t.Fatalf("failed to write gguf: %v", err)
}
if _, err := file.Seek(0, io.SeekStart); err != nil {
t.Fatalf("failed to seek to start: %v", err)
}
layers, err := parseFromFile(context.Background(), file, "", func(api.ProgressResponse) {})
if err != nil {
t.Fatalf("failed to parse from file: %v", err)
}
if len(layers) != 1 {
t.Fatalf("got %d != want 1", len(layers))
}
if _, err := file.Seek(0, io.SeekStart); err != nil {
t.Fatalf("failed to seek to start: %v", err)
}
layers2, err := parseFromFile(context.Background(), file, layers[0].Digest, func(api.ProgressResponse) {})
if err != nil {
t.Fatalf("failed to parse from file: %v", err)
}
if len(layers2) != 1 {
t.Fatalf("got %d != want 1", len(layers2))
}
if layers[0].Digest != layers2[0].Digest {
t.Fatalf("got %s != want %s", layers[0].Digest, layers2[0].Digest)
}
if layers[0].Size != layers2[0].Size {
t.Fatalf("got %d != want %d", layers[0].Size, layers2[0].Size)
}
if layers[0].MediaType != layers2[0].MediaType {
t.Fatalf("got %v != want %v", layers[0].MediaType, layers2[0].MediaType)
}
}
func TestParseLayerFromCopy(t *testing.T) {
tempModels := t.TempDir()
file2, err := os.CreateTemp(tempModels, "")
if err != nil {
t.Fatalf("failed to open file: %v", err)
}
defer file2.Close()
for range 5 {
if err := llm.WriteGGUF(file2, llm.KV{"general.architecture": "gemma"}, []llm.Tensor{}); err != nil {
t.Fatalf("failed to write gguf: %v", err)
}
}
if _, err := file2.Seek(0, io.SeekStart); err != nil {
t.Fatalf("failed to seek to start: %v", err)
}
layers, err := parseFromFile(context.Background(), file2, "", func(api.ProgressResponse) {})
if err != nil {
t.Fatalf("failed to parse from file: %v", err)
}
if len(layers) != 5 {
t.Fatalf("got %d != want 5", len(layers))
}
}

View File

@@ -324,10 +324,13 @@ func (s *Server) EmbedHandler(c *gin.Context) {
input = append(input, v.(string))
}
default:
if req.Input != nil {
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "invalid input type"})
return
}
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "invalid input type"})
return
}
if len(input) == 0 {
c.JSON(http.StatusOK, api.EmbedResponse{Model: req.Model, Embeddings: [][]float32{}})
return
}
r, m, opts, err := s.scheduleRunner(c.Request.Context(), req.Model, []Capability{}, req.Options, req.KeepAlive)
@@ -338,11 +341,6 @@ func (s *Server) EmbedHandler(c *gin.Context) {
checkpointLoaded := time.Now()
if len(input) == 0 {
c.JSON(http.StatusOK, api.EmbedResponse{Model: req.Model, Embeddings: [][]float32{}})
return
}
kvData, err := getKVData(m.ModelPath, false)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
@@ -1045,6 +1043,11 @@ func allowedHostsMiddleware(addr net.Addr) gin.HandlerFunc {
if addr, err := netip.ParseAddr(host); err == nil {
if addr.IsLoopback() || addr.IsPrivate() || addr.IsUnspecified() || isLocalIP(addr) {
if c.Request.Method == http.MethodOptions {
c.AbortWithStatus(http.StatusNoContent)
return
}
c.Next()
return
}
@@ -1076,6 +1079,7 @@ func (s *Server) GenerateRoutes() http.Handler {
config.AllowOrigins = envconfig.Origins()
r := gin.Default()
r.HandleMethodNotAllowed = true
r.Use(
cors.New(config),
allowedHostsMiddleware(s.addr),

View File

@@ -272,6 +272,76 @@ func Test_Routes(t *testing.T) {
assert.Equal(t, "library", retrieveResp.OwnedBy)
},
},
{
Name: "Embed Handler Empty Input",
Method: http.MethodPost,
Path: "/api/embed",
Setup: func(t *testing.T, req *http.Request) {
embedReq := api.EmbedRequest{
Model: "t-bone",
Input: "",
}
jsonData, err := json.Marshal(embedReq)
require.NoError(t, err)
req.Body = io.NopCloser(bytes.NewReader(jsonData))
},
Expected: func(t *testing.T, resp *http.Response) {
contentType := resp.Header.Get("Content-Type")
if contentType != "application/json; charset=utf-8" {
t.Fatalf("expected content type application/json; charset=utf-8, got %s", contentType)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
t.Fatal(err)
}
var embedResp api.EmbedResponse
err = json.Unmarshal(body, &embedResp)
if err != nil {
t.Fatal(err)
}
if embedResp.Model != "t-bone" {
t.Fatalf("expected model t-bone, got %s", embedResp.Model)
}
if embedResp.Embeddings == nil {
t.Fatalf("expected embeddings to not be nil, got %v", embedResp.Embeddings)
}
if len(embedResp.Embeddings) != 0 {
t.Fatalf("expected embeddings to be empty, got %v", embedResp.Embeddings)
}
},
},
{
Name: "Embed Handler Invalid Input",
Method: http.MethodPost,
Path: "/api/embed",
Setup: func(t *testing.T, req *http.Request) {
embedReq := api.EmbedRequest{
Model: "t-bone",
Input: 2,
}
jsonData, err := json.Marshal(embedReq)
require.NoError(t, err)
req.Body = io.NopCloser(bytes.NewReader(jsonData))
},
Expected: func(t *testing.T, resp *http.Response) {
contentType := resp.Header.Get("Content-Type")
if contentType != "application/json; charset=utf-8" {
t.Fatalf("expected content type application/json; charset=utf-8, got %s", contentType)
}
_, err := io.ReadAll(resp.Body)
if err != nil {
t.Fatal(err)
}
if resp.StatusCode != http.StatusBadRequest {
t.Fatalf("expected status code 400, got %d", resp.StatusCode)
}
},
},
}
t.Setenv("OLLAMA_MODELS", t.TempDir())

View File

@@ -418,7 +418,7 @@ func (s *Scheduler) load(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList,
// some older models are not compatible with newer versions of llama.cpp
// show a generalized compatibility error until there is a better way to
// check for model compatibility
if errors.Is(err, llm.ErrUnsupportedFormat) || strings.Contains(err.Error(), "failed to load model") {
if errors.Is(llm.ErrUnsupportedFormat, err) || strings.Contains(err.Error(), "failed to load model") {
err = fmt.Errorf("%v: this model may be incompatible with your version of Ollama. If you previously pulled this model, try updating it by running `ollama pull %s`", err, req.model.ShortName)
}
slog.Info("NewLlamaServer failed", "model", req.model.ModelPath, "error", err)

View File

@@ -219,7 +219,7 @@ func (n Name) String() string {
return b.String()
}
// DisplayShortest returns a short string version of the name.
// DisplayShort returns a short string version of the name.
func (n Name) DisplayShortest() string {
var sb strings.Builder