mirror of
https://github.com/ollama/ollama.git
synced 2026-01-01 12:08:20 -05:00
Compare commits
48 Commits
royh-embed
...
jyan/quant
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8476ef2bd8 | ||
|
|
4c9a160a08 | ||
|
|
657a1102fc | ||
|
|
d352c68ffc | ||
|
|
d82d25d70c | ||
|
|
60be9e2840 | ||
|
|
a083852eb5 | ||
|
|
6bab0e2368 | ||
|
|
c4cccaf936 | ||
|
|
9fe5c393e4 | ||
|
|
007c988dba | ||
|
|
91d21e7c7b | ||
|
|
3e64284f69 | ||
|
|
39910f2ab2 | ||
|
|
96d0cd92f2 | ||
|
|
3a724a7c80 | ||
|
|
f520f0056e | ||
|
|
d25f85ede4 | ||
|
|
b48420b74b | ||
|
|
784958a1cb | ||
|
|
ae65cc8dea | ||
|
|
a037528bba | ||
|
|
04bf41deb5 | ||
|
|
c23cec9547 | ||
|
|
8377dc48d0 | ||
|
|
3aee405dfa | ||
|
|
9b3f47b674 | ||
|
|
f5441f01a2 | ||
|
|
ab165df43a | ||
|
|
79cc4c9585 | ||
|
|
bc3f59a6ad | ||
|
|
1a85cb904c | ||
|
|
10ea0987e9 | ||
|
|
413d368a6a | ||
|
|
cabf375059 | ||
|
|
ca0ee1d4fe | ||
|
|
1142999aab | ||
|
|
0d5a72aba9 | ||
|
|
ea837412c2 | ||
|
|
736ad6f438 | ||
|
|
64607d16a5 | ||
|
|
a6cfe7f00b | ||
|
|
c3b411a515 | ||
|
|
928f37e3ae | ||
|
|
2d1e3c3229 | ||
|
|
4918fae535 | ||
|
|
0aff67877e | ||
|
|
0bacb30007 |
1
.gitattributes
vendored
1
.gitattributes
vendored
@@ -1 +1,2 @@
|
||||
llm/ext_server/* linguist-vendored
|
||||
llm/*.h linguist-vendored
|
||||
@@ -17,14 +17,20 @@ import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ollama/ollama/auth"
|
||||
"github.com/ollama/ollama/envconfig"
|
||||
"github.com/ollama/ollama/format"
|
||||
"github.com/ollama/ollama/version"
|
||||
@@ -347,16 +353,7 @@ func (c *Client) Heartbeat(ctx context.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Embed generates embeddings from a model.
|
||||
func (c *Client) Embed(ctx context.Context, req *EmbedRequest) (*EmbedResponse, error) {
|
||||
var resp EmbedResponse
|
||||
if err := c.do(ctx, http.MethodPost, "/api/embed", req, &resp); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &resp, nil
|
||||
}
|
||||
|
||||
// Embeddings generates embeddings from a model. (Legacy)
|
||||
// Embeddings generates embeddings from a model.
|
||||
func (c *Client) Embeddings(ctx context.Context, req *EmbeddingRequest) (*EmbeddingResponse, error) {
|
||||
var resp EmbeddingResponse
|
||||
if err := c.do(ctx, http.MethodPost, "/api/embeddings", req, &resp); err != nil {
|
||||
@@ -383,3 +380,27 @@ func (c *Client) Version(ctx context.Context) (string, error) {
|
||||
|
||||
return version.Version, nil
|
||||
}
|
||||
|
||||
func Authorization(ctx context.Context, request *http.Request) (string, error) {
|
||||
data := []byte(fmt.Sprintf("%s,%s,%d", request.Method, request.URL.RequestURI(), time.Now().Unix()))
|
||||
|
||||
home, err := os.UserHomeDir()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
knownHostsFile, err := os.OpenFile(filepath.Join(home, ".ollama", "known_hosts"), os.O_CREATE|os.O_RDWR|os.O_APPEND, 0600)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer knownHostsFile.Close()
|
||||
|
||||
token, err := auth.Sign(ctx, data)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// interleave request data into the token
|
||||
key, sig, _ := strings.Cut(token, ":")
|
||||
return fmt.Sprintf("%s:%s:%s", key, base64.StdEncoding.EncodeToString(data), sig), nil
|
||||
}
|
||||
|
||||
25
api/types.go
25
api/types.go
@@ -173,30 +173,6 @@ type Runner struct {
|
||||
NumThread int `json:"num_thread,omitempty"`
|
||||
}
|
||||
|
||||
// EmbedRequest is the request passed to [Client.Embed].
|
||||
type EmbedRequest struct {
|
||||
// Model is the model name.
|
||||
Model string `json:"model"`
|
||||
|
||||
// Input is the input to embed.
|
||||
Input any `json:"input"`
|
||||
|
||||
// KeepAlive controls how long the model will stay loaded in memory following
|
||||
// this request.
|
||||
KeepAlive *Duration `json:"keep_alive,omitempty"`
|
||||
|
||||
Truncate *bool `json:"truncate,omitempty"`
|
||||
|
||||
// Options lists model-specific options.
|
||||
Options map[string]interface{} `json:"options"`
|
||||
}
|
||||
|
||||
// EmbedResponse is the response from [Client.Embed].
|
||||
type EmbedResponse struct {
|
||||
Model string `json:"model"`
|
||||
Embeddings [][]float32 `json:"embeddings,omitempty"`
|
||||
}
|
||||
|
||||
// EmbeddingRequest is the request passed to [Client.Embeddings].
|
||||
type EmbeddingRequest struct {
|
||||
// Model is the model name.
|
||||
@@ -291,6 +267,7 @@ type PullRequest struct {
|
||||
type ProgressResponse struct {
|
||||
Status string `json:"status"`
|
||||
Digest string `json:"digest,omitempty"`
|
||||
Quantize string `json:"quantize,omitempty"`
|
||||
Total int64 `json:"total,omitempty"`
|
||||
Completed int64 `json:"completed,omitempty"`
|
||||
}
|
||||
|
||||
54
auth/auth.go
54
auth/auth.go
@@ -10,42 +10,37 @@ import (
|
||||
"log/slog"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/crypto/ssh"
|
||||
)
|
||||
|
||||
const defaultPrivateKey = "id_ed25519"
|
||||
|
||||
func keyPath() (string, error) {
|
||||
func keyPath() (ssh.Signer, error) {
|
||||
home, err := os.UserHomeDir()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return filepath.Join(home, ".ollama", defaultPrivateKey), nil
|
||||
}
|
||||
|
||||
func GetPublicKey() (string, error) {
|
||||
keyPath, err := keyPath()
|
||||
if err != nil {
|
||||
return "", err
|
||||
return nil, err
|
||||
}
|
||||
|
||||
keyPath := filepath.Join(home, ".ollama", defaultPrivateKey)
|
||||
privateKeyFile, err := os.ReadFile(keyPath)
|
||||
if err != nil {
|
||||
slog.Info(fmt.Sprintf("Failed to load private key: %v", err))
|
||||
return "", err
|
||||
return nil, err
|
||||
}
|
||||
|
||||
privateKey, err := ssh.ParsePrivateKey(privateKeyFile)
|
||||
return ssh.ParsePrivateKey(privateKeyFile)
|
||||
}
|
||||
|
||||
func GetPublicKey() (ssh.PublicKey, error) {
|
||||
privateKey, err := keyPath()
|
||||
// if privateKey, try public key directly
|
||||
|
||||
if err != nil {
|
||||
return "", err
|
||||
return nil, err
|
||||
}
|
||||
|
||||
publicKey := ssh.MarshalAuthorizedKey(privateKey.PublicKey())
|
||||
|
||||
return strings.TrimSpace(string(publicKey)), nil
|
||||
return privateKey.PublicKey(), nil
|
||||
}
|
||||
|
||||
func NewNonce(r io.Reader, length int) (string, error) {
|
||||
@@ -58,25 +53,20 @@ func NewNonce(r io.Reader, length int) (string, error) {
|
||||
}
|
||||
|
||||
func Sign(ctx context.Context, bts []byte) (string, error) {
|
||||
keyPath, err := keyPath()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
privateKeyFile, err := os.ReadFile(keyPath)
|
||||
if err != nil {
|
||||
slog.Info(fmt.Sprintf("Failed to load private key: %v", err))
|
||||
return "", err
|
||||
}
|
||||
|
||||
privateKey, err := ssh.ParsePrivateKey(privateKeyFile)
|
||||
privateKey, err := keyPath()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// get the pubkey, but remove the type
|
||||
publicKey := ssh.MarshalAuthorizedKey(privateKey.PublicKey())
|
||||
parts := bytes.Split(publicKey, []byte(" "))
|
||||
publicKey, err := GetPublicKey()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
publicKeyBytes := ssh.MarshalAuthorizedKey(publicKey)
|
||||
|
||||
parts := bytes.Split(publicKeyBytes, []byte(" "))
|
||||
if len(parts) < 2 {
|
||||
return "", fmt.Errorf("malformed public key")
|
||||
}
|
||||
|
||||
180
cmd/cmd.go
180
cmd/cmd.go
@@ -7,6 +7,7 @@ import (
|
||||
"crypto/ed25519"
|
||||
"crypto/rand"
|
||||
"crypto/sha256"
|
||||
"encoding/json"
|
||||
"encoding/pem"
|
||||
"errors"
|
||||
"fmt"
|
||||
@@ -15,6 +16,7 @@ import (
|
||||
"math"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"os/signal"
|
||||
"path/filepath"
|
||||
@@ -78,6 +80,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
|
||||
status := "transferring model data"
|
||||
spinner := progress.NewSpinner(status)
|
||||
p.Add(status, spinner)
|
||||
defer p.Stop()
|
||||
|
||||
for i := range modelfile.Commands {
|
||||
switch modelfile.Commands[i].Name {
|
||||
@@ -112,16 +115,17 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
|
||||
path = tempfile
|
||||
}
|
||||
|
||||
digest, err := createBlob(cmd, client, path)
|
||||
// spinner.Stop()
|
||||
digest, err := createBlob(cmd, client, path, spinner)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
modelfile.Commands[i].Args = "@" + digest
|
||||
}
|
||||
}
|
||||
|
||||
bars := make(map[string]*progress.Bar)
|
||||
var quantizeSpin *progress.Spinner
|
||||
fn := func(resp api.ProgressResponse) error {
|
||||
if resp.Digest != "" {
|
||||
spinner.Stop()
|
||||
@@ -134,11 +138,20 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
|
||||
}
|
||||
|
||||
bar.Set(resp.Completed)
|
||||
} else if resp.Quantize != "" {
|
||||
spinner.Stop()
|
||||
|
||||
if quantizeSpin != nil {
|
||||
quantizeSpin.SetMessage(resp.Status)
|
||||
} else {
|
||||
quantizeSpin = progress.NewSpinner(resp.Status)
|
||||
p.Add("quantize", quantizeSpin)
|
||||
}
|
||||
} else if status != resp.Status {
|
||||
spinner.Stop()
|
||||
|
||||
status = resp.Status
|
||||
spinner = progress.NewSpinner(status)
|
||||
spinner := progress.NewSpinner(status)
|
||||
p.Add(status, spinner)
|
||||
}
|
||||
|
||||
@@ -263,13 +276,22 @@ func tempZipFiles(path string) (string, error) {
|
||||
return tempfile.Name(), nil
|
||||
}
|
||||
|
||||
func createBlob(cmd *cobra.Command, client *api.Client, path string) (string, error) {
|
||||
var ErrBlobExists = errors.New("blob exists")
|
||||
|
||||
func createBlob(cmd *cobra.Command, client *api.Client, path string, spinner *progress.Spinner) (string, error) {
|
||||
bin, err := os.Open(path)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer bin.Close()
|
||||
|
||||
// Get file info to retrieve the size
|
||||
fileInfo, err := bin.Stat()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
fileSize := fileInfo.Size()
|
||||
|
||||
hash := sha256.New()
|
||||
if _, err := io.Copy(hash, bin); err != nil {
|
||||
return "", err
|
||||
@@ -279,13 +301,157 @@ func createBlob(cmd *cobra.Command, client *api.Client, path string) (string, er
|
||||
return "", err
|
||||
}
|
||||
|
||||
var pw progressWriter
|
||||
// Create a progress bar and start a goroutine to update it
|
||||
// JK Let's use a percentage
|
||||
|
||||
//bar := progress.NewBar("transferring model data...", fileSize, 0)
|
||||
//p.Add("transferring model data", bar)
|
||||
|
||||
status := "transferring model data 0%"
|
||||
spinner.SetMessage(status)
|
||||
|
||||
ticker := time.NewTicker(60 * time.Millisecond)
|
||||
done := make(chan struct{})
|
||||
defer close(done)
|
||||
|
||||
go func() {
|
||||
defer ticker.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-ticker.C:
|
||||
spinner.SetMessage(fmt.Sprintf("transferring model data %d%%", int(100*pw.n/fileSize)))
|
||||
case <-done:
|
||||
spinner.SetMessage("transferring model data 100%")
|
||||
return
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
digest := fmt.Sprintf("sha256:%x", hash.Sum(nil))
|
||||
if err = client.CreateBlob(cmd.Context(), digest, bin); err != nil {
|
||||
|
||||
// We check if we can find the models directory locally
|
||||
// If we can, we return the path to the directory
|
||||
// If we can't, we return an error
|
||||
// If the blob exists already, we return the digest
|
||||
dest, err := getLocalPath(cmd.Context(), digest)
|
||||
|
||||
if errors.Is(err, ErrBlobExists) {
|
||||
return digest, nil
|
||||
}
|
||||
|
||||
// Successfully found the model directory
|
||||
if err == nil {
|
||||
// Copy blob in via OS specific copy
|
||||
// Linux errors out to use io.copy
|
||||
err = localCopy(path, dest)
|
||||
if err == nil {
|
||||
return digest, nil
|
||||
}
|
||||
|
||||
// Default copy using io.copy
|
||||
err = defaultCopy(path, dest)
|
||||
if err == nil {
|
||||
return digest, nil
|
||||
}
|
||||
}
|
||||
|
||||
// If at any point copying the blob over locally fails, we default to the copy through the server
|
||||
if err = client.CreateBlob(cmd.Context(), digest, io.TeeReader(bin, &pw)); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return digest, nil
|
||||
}
|
||||
|
||||
type progressWriter struct {
|
||||
n int64
|
||||
}
|
||||
|
||||
func (w *progressWriter) Write(p []byte) (n int, err error) {
|
||||
w.n += int64(len(p))
|
||||
return len(p), nil
|
||||
}
|
||||
|
||||
func getLocalPath(ctx context.Context, digest string) (string, error) {
|
||||
ollamaHost := envconfig.Host
|
||||
|
||||
client := http.DefaultClient
|
||||
base := &url.URL{
|
||||
Scheme: ollamaHost.Scheme,
|
||||
Host: net.JoinHostPort(ollamaHost.Host, ollamaHost.Port),
|
||||
}
|
||||
|
||||
data, err := json.Marshal(digest)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
reqBody := bytes.NewReader(data)
|
||||
path := fmt.Sprintf("/api/blobs/%s", digest)
|
||||
requestURL := base.JoinPath(path)
|
||||
request, err := http.NewRequestWithContext(ctx, http.MethodPost, requestURL.String(), reqBody)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
authz, err := api.Authorization(ctx, request)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
request.Header.Set("Authorization", authz)
|
||||
request.Header.Set("User-Agent", fmt.Sprintf("ollama/%s (%s %s) Go/%s", version.Version, runtime.GOARCH, runtime.GOOS, runtime.Version()))
|
||||
request.Header.Set("X-Redirect-Create", "1")
|
||||
|
||||
resp, err := client.Do(request)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode == http.StatusTemporaryRedirect {
|
||||
dest := resp.Header.Get("LocalLocation")
|
||||
|
||||
return dest, nil
|
||||
}
|
||||
return "", ErrBlobExists
|
||||
}
|
||||
|
||||
func defaultCopy(path string, dest string) error {
|
||||
// This function should be called if the server is local
|
||||
// It should find the model directory, copy the blob over, and return the digest
|
||||
dirPath := filepath.Dir(dest)
|
||||
|
||||
if err := os.MkdirAll(dirPath, 0o755); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Copy blob over
|
||||
sourceFile, err := os.Open(path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not open source file: %v", err)
|
||||
}
|
||||
defer sourceFile.Close()
|
||||
|
||||
destFile, err := os.Create(dest)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not create destination file: %v", err)
|
||||
}
|
||||
defer destFile.Close()
|
||||
|
||||
_, err = io.CopyBuffer(destFile, sourceFile, make([]byte, 4*1024*1024))
|
||||
if err != nil {
|
||||
return fmt.Errorf("error copying file: %v", err)
|
||||
}
|
||||
|
||||
err = destFile.Sync()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error flushing file: %v", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func RunHandler(cmd *cobra.Command, args []string) error {
|
||||
interactive := true
|
||||
|
||||
@@ -379,11 +545,13 @@ func errFromUnknownKey(unknownKeyErr error) error {
|
||||
if len(matches) > 0 {
|
||||
serverPubKey := matches[0]
|
||||
|
||||
localPubKey, err := auth.GetPublicKey()
|
||||
publicKey, err := auth.GetPublicKey()
|
||||
if err != nil {
|
||||
return unknownKeyErr
|
||||
}
|
||||
|
||||
localPubKey := strings.TrimSpace(string(ssh.MarshalAuthorizedKey(publicKey)))
|
||||
|
||||
if runtime.GOOS == "linux" && serverPubKey != localPubKey {
|
||||
// try the ollama service public key
|
||||
svcPubKey, err := os.ReadFile("/usr/share/ollama/.ollama/id_ed25519.pub")
|
||||
|
||||
23
cmd/copy_darwin.go
Normal file
23
cmd/copy_darwin.go
Normal file
@@ -0,0 +1,23 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
func localCopy(src, target string) error {
|
||||
dirPath := filepath.Dir(target)
|
||||
|
||||
if err := os.MkdirAll(dirPath, 0o755); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err := unix.Clonefile(src, target, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
7
cmd/copy_linux.go
Normal file
7
cmd/copy_linux.go
Normal file
@@ -0,0 +1,7 @@
|
||||
package cmd
|
||||
|
||||
import "errors"
|
||||
|
||||
func localCopy(src, target string) error {
|
||||
return errors.New("no local copy implementation for linux")
|
||||
}
|
||||
67
cmd/copy_windows.go
Normal file
67
cmd/copy_windows.go
Normal file
@@ -0,0 +1,67 @@
|
||||
//go:build windows
|
||||
// +build windows
|
||||
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
func localCopy(src, target string) error {
|
||||
// Create target directory if it doesn't exist
|
||||
dirPath := filepath.Dir(target)
|
||||
if err := os.MkdirAll(dirPath, 0o755); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Open source file
|
||||
sourceFile, err := os.Open(src)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer sourceFile.Close()
|
||||
|
||||
// Create target file
|
||||
targetFile, err := os.Create(target)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer targetFile.Close()
|
||||
|
||||
// Use CopyFileExW to copy the file
|
||||
err = copyFileEx(src, target)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func copyFileEx(src, dst string) error {
|
||||
kernel32 := syscall.NewLazyDLL("kernel32.dll")
|
||||
copyFileEx := kernel32.NewProc("CopyFileExW")
|
||||
|
||||
srcPtr, err := syscall.UTF16PtrFromString(src)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
dstPtr, err := syscall.UTF16PtrFromString(dst)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
r1, _, err := copyFileEx.Call(
|
||||
uintptr(unsafe.Pointer(srcPtr)),
|
||||
uintptr(unsafe.Pointer(dstPtr)),
|
||||
0, 0, 0, 0)
|
||||
|
||||
if r1 == 0 {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -1,152 +0,0 @@
|
||||
//go:build integration
|
||||
|
||||
package integration
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
)
|
||||
|
||||
func TestAllMiniLMEmbed(t *testing.T) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
|
||||
defer cancel()
|
||||
|
||||
req := api.EmbedRequest{
|
||||
Model: "all-minilm",
|
||||
Input: "why is the sky blue?",
|
||||
}
|
||||
|
||||
res, err := embedTestHelper(ctx, t, req)
|
||||
|
||||
if err != nil {
|
||||
t.Fatalf("error: %v", err)
|
||||
}
|
||||
|
||||
if len(res.Embeddings) != 1 {
|
||||
t.Fatalf("expected 1 embedding, got %d", len(res.Embeddings))
|
||||
}
|
||||
|
||||
if len(res.Embeddings[0]) != 384 {
|
||||
t.Fatalf("expected 384 floats, got %d", len(res.Embeddings[0]))
|
||||
}
|
||||
|
||||
if res.Embeddings[0][0] != 0.010071031 {
|
||||
t.Fatalf("expected 0.010071031, got %f", res.Embeddings[0][0])
|
||||
}
|
||||
}
|
||||
|
||||
func TestAllMiniLMBatchEmbed(t *testing.T) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
|
||||
defer cancel()
|
||||
|
||||
req := api.EmbedRequest{
|
||||
Model: "all-minilm",
|
||||
Input: []string{"why is the sky blue?", "why is the grass green?"},
|
||||
}
|
||||
|
||||
res, err := embedTestHelper(ctx, t, req)
|
||||
|
||||
if err != nil {
|
||||
t.Fatalf("error: %v", err)
|
||||
}
|
||||
|
||||
if len(res.Embeddings) != 2 {
|
||||
t.Fatalf("expected 2 embeddings, got %d", len(res.Embeddings))
|
||||
}
|
||||
|
||||
if len(res.Embeddings[0]) != 384 {
|
||||
t.Fatalf("expected 384 floats, got %d", len(res.Embeddings[0]))
|
||||
}
|
||||
|
||||
if res.Embeddings[0][0] != 0.010071031 || res.Embeddings[1][0] != -0.009802706 {
|
||||
t.Fatalf("expected 0.010071031 and -0.009802706, got %f and %f", res.Embeddings[0][0], res.Embeddings[1][0])
|
||||
}
|
||||
}
|
||||
|
||||
func TestAllMiniLmEmbedTruncate(t *testing.T) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
|
||||
defer cancel()
|
||||
|
||||
truncTrue, truncFalse := true, false
|
||||
|
||||
type testReq struct {
|
||||
Name string
|
||||
Request api.EmbedRequest
|
||||
}
|
||||
|
||||
reqs := []testReq{
|
||||
{
|
||||
Name: "Target Truncation",
|
||||
Request: api.EmbedRequest{
|
||||
Model: "all-minilm",
|
||||
Input: "why",
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "Default Truncate",
|
||||
Request: api.EmbedRequest{
|
||||
Model: "all-minilm",
|
||||
Input: "why is the sky blue?",
|
||||
Options: map[string]any{"num_ctx": 1},
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "Explicit Truncate",
|
||||
Request: api.EmbedRequest{
|
||||
Model: "all-minilm",
|
||||
Input: "why is the sky blue?",
|
||||
Truncate: &truncTrue,
|
||||
Options: map[string]any{"num_ctx": 1},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
res := make(map[string]*api.EmbedResponse)
|
||||
|
||||
for _, req := range reqs {
|
||||
response, err := embedTestHelper(ctx, t, req.Request)
|
||||
if err != nil {
|
||||
t.Fatalf("error: %v", err)
|
||||
}
|
||||
res[req.Name] = response
|
||||
}
|
||||
|
||||
if res["Target Truncation"].Embeddings[0][0] != res["Default Truncate"].Embeddings[0][0] {
|
||||
t.Fatal("expected default request to truncate correctly")
|
||||
}
|
||||
|
||||
if res["Default Truncate"].Embeddings[0][0] != res["Explicit Truncate"].Embeddings[0][0] {
|
||||
t.Fatal("expected default request and truncate true request to be the same")
|
||||
}
|
||||
|
||||
// check that truncate set to false returns an error if context length is exceeded
|
||||
_, err := embedTestHelper(ctx, t, api.EmbedRequest{
|
||||
Model: "all-minilm",
|
||||
Input: "why is the sky blue?",
|
||||
Truncate: &truncFalse,
|
||||
Options: map[string]any{"num_ctx": 1},
|
||||
})
|
||||
|
||||
if err == nil {
|
||||
t.Fatal("expected error, got nil")
|
||||
}
|
||||
}
|
||||
|
||||
func embedTestHelper(ctx context.Context, t *testing.T, req api.EmbedRequest) (*api.EmbedResponse, error) {
|
||||
client, _, cleanup := InitServerConnection(ctx, t)
|
||||
defer cleanup()
|
||||
if err := PullIfMissing(ctx, client, req.Model); err != nil {
|
||||
t.Fatalf("failed to pull model %s: %v", req.Model, err)
|
||||
}
|
||||
|
||||
response, err := client.Embed(ctx, &req)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return response, nil
|
||||
}
|
||||
37
llm/ext_server/server.cpp
vendored
37
llm/ext_server/server.cpp
vendored
@@ -3188,33 +3188,26 @@ int main(int argc, char **argv) {
|
||||
prompt = "";
|
||||
}
|
||||
|
||||
if (prompt.size() == 1) {
|
||||
prompt = prompt[0];
|
||||
json image_data;
|
||||
if (body.count("image_data") != 0) {
|
||||
image_data = body["image_data"];
|
||||
}
|
||||
else
|
||||
{
|
||||
image_data = "";
|
||||
}
|
||||
|
||||
// create and queue the task
|
||||
json responses;
|
||||
{
|
||||
const int id_task = llama.queue_tasks.get_new_id();
|
||||
llama.queue_results.add_waiting_task_id(id_task);
|
||||
llama.request_completion(id_task, {{"prompt", prompt}}, true, -1);
|
||||
const int task_id = llama.queue_tasks.get_new_id();
|
||||
llama.queue_results.add_waiting_task_id(task_id);
|
||||
llama.request_completion(task_id, { {"prompt", prompt}, { "n_predict", 0}, {"image_data", image_data} }, true, -1);
|
||||
|
||||
// get the result
|
||||
task_result result = llama.queue_results.recv(id_task);
|
||||
llama.queue_results.remove_waiting_task_id(id_task);
|
||||
if (result.error) {
|
||||
return res.set_content(result.result_json.dump(), "application/json; charset=utf-8");
|
||||
}
|
||||
// get the result
|
||||
task_result result = llama.queue_results.recv(task_id);
|
||||
llama.queue_results.remove_waiting_task_id(task_id);
|
||||
|
||||
responses = result.result_json.value("results", std::vector<json>{result.result_json});
|
||||
json embeddings = json::array();
|
||||
for (auto & elem : responses) {
|
||||
embeddings.push_back(elem.at("embedding"));
|
||||
}
|
||||
// send the result
|
||||
json embedding_res = json{{"embedding", embeddings}};
|
||||
return res.set_content(embedding_res.dump(), "application/json; charset=utf-8");
|
||||
}
|
||||
// send the result
|
||||
return res.set_content(result.result_json.dump(), "application/json; charset=utf-8");
|
||||
});
|
||||
|
||||
// GG: if I put the main loop inside a thread, it crashes on the first request when build in Debug!?
|
||||
|
||||
@@ -254,7 +254,7 @@ if [ -z "${OLLAMA_SKIP_ROCM_GENERATE}" -a -d "${ROCM_PATH}" ]; then
|
||||
ROCM_VARIANT=_v$(ls ${ROCM_PATH}/lib/librocblas.so.*.*.????? | cut -f5 -d. || true)
|
||||
fi
|
||||
init_vars
|
||||
CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DGGML_HIPBLAS=on -DCMAKE_C_COMPILER=$ROCM_PATH/llvm/bin/clang -DCMAKE_CXX_COMPILER=$ROCM_PATH/llvm/bin/clang++ -DAMDGPU_TARGETS=$(amdGPUs) -DGPU_TARGETS=$(amdGPUs)"
|
||||
CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DGGML_HIPBLAS=on -DLLAMA_CUDA_NO_PEER_COPY=on -DCMAKE_C_COMPILER=$ROCM_PATH/llvm/bin/clang -DCMAKE_CXX_COMPILER=$ROCM_PATH/llvm/bin/clang++ -DAMDGPU_TARGETS=$(amdGPUs) -DGPU_TARGETS=$(amdGPUs)"
|
||||
# Users building from source can tune the exact flags we pass to cmake for configuring llama.cpp
|
||||
if [ -n "${OLLAMA_CUSTOM_ROCM_DEFS}" ]; then
|
||||
echo "OLLAMA_CUSTOM_ROCM_DEFS=\"${OLLAMA_CUSTOM_ROCM_DEFS}\""
|
||||
|
||||
@@ -366,6 +366,7 @@ function build_rocm() {
|
||||
"-DCMAKE_C_COMPILER=clang.exe",
|
||||
"-DCMAKE_CXX_COMPILER=clang++.exe",
|
||||
"-DGGML_HIPBLAS=on",
|
||||
"-DLLAMA_CUDA_NO_PEER_COPY=on",
|
||||
"-DHIP_PLATFORM=amd",
|
||||
"-DGGML_AVX=on",
|
||||
"-DGGML_AVX2=off",
|
||||
|
||||
1227
llm/llama.h
vendored
Normal file
1227
llm/llama.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
51
llm/llm.go
51
llm/llm.go
@@ -1,6 +1,6 @@
|
||||
package llm
|
||||
|
||||
// #cgo CFLAGS: -Illama.cpp -Illama.cpp/include -Illama.cpp/ggml/include
|
||||
// #cgo CPPFLAGS: -Illama.cpp/ggml/include
|
||||
// #cgo LDFLAGS: -lllama -lggml -lstdc++ -lpthread
|
||||
// #cgo darwin,arm64 LDFLAGS: -L${SRCDIR}/build/darwin/arm64_static -L${SRCDIR}/build/darwin/arm64_static/src -L${SRCDIR}/build/darwin/arm64_static/ggml/src -framework Accelerate -framework Metal
|
||||
// #cgo darwin,amd64 LDFLAGS: -L${SRCDIR}/build/darwin/x86_64_static -L${SRCDIR}/build/darwin/x86_64_static/src -L${SRCDIR}/build/darwin/x86_64_static/ggml/src
|
||||
@@ -9,11 +9,22 @@ package llm
|
||||
// #cgo linux,amd64 LDFLAGS: -L${SRCDIR}/build/linux/x86_64_static -L${SRCDIR}/build/linux/x86_64_static/src -L${SRCDIR}/build/linux/x86_64_static/ggml/src
|
||||
// #cgo linux,arm64 LDFLAGS: -L${SRCDIR}/build/linux/arm64_static -L${SRCDIR}/build/linux/arm64_static/src -L${SRCDIR}/build/linux/arm64_static/ggml/src
|
||||
// #include <stdlib.h>
|
||||
// #include <stdatomic.h>
|
||||
// #include "llama.h"
|
||||
// bool update_quantize_progress(float progress, void* data) {
|
||||
// atomic_int* atomicData = (atomic_int*)data;
|
||||
// int intProgress = *((int*)&progress);
|
||||
// atomic_store(atomicData, intProgress);
|
||||
// return true;
|
||||
// }
|
||||
import "C"
|
||||
import (
|
||||
"fmt"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
"unsafe"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
)
|
||||
|
||||
// SystemInfo is an unused example of calling llama.cpp functions using CGo
|
||||
@@ -21,7 +32,7 @@ func SystemInfo() string {
|
||||
return C.GoString(C.llama_print_system_info())
|
||||
}
|
||||
|
||||
func Quantize(infile, outfile string, ftype fileType) error {
|
||||
func Quantize(infile, outfile string, ftype fileType, fn func(resp api.ProgressResponse), tensorCount int) error {
|
||||
cinfile := C.CString(infile)
|
||||
defer C.free(unsafe.Pointer(cinfile))
|
||||
|
||||
@@ -32,6 +43,42 @@ func Quantize(infile, outfile string, ftype fileType) error {
|
||||
params.nthread = -1
|
||||
params.ftype = ftype.Value()
|
||||
|
||||
// Initialize "global" to store progress
|
||||
store := (*int32)(C.malloc(C.sizeof_int))
|
||||
defer C.free(unsafe.Pointer(store))
|
||||
|
||||
// Initialize store value, e.g., setting initial progress to 0
|
||||
atomic.StoreInt32(store, 0)
|
||||
|
||||
params.quantize_callback_data = unsafe.Pointer(store)
|
||||
params.quantize_callback = (C.llama_progress_callback)(C.update_quantize_progress)
|
||||
|
||||
ticker := time.NewTicker(30 * time.Millisecond)
|
||||
done := make(chan struct{})
|
||||
defer close(done)
|
||||
|
||||
go func() {
|
||||
defer ticker.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-ticker.C:
|
||||
progressInt := atomic.LoadInt32(store)
|
||||
progress := *(*float32)(unsafe.Pointer(&progressInt))
|
||||
fn(api.ProgressResponse{
|
||||
Status: fmt.Sprintf("quantizing model tensors %d/%d", int(progress), tensorCount),
|
||||
Quantize: "quant",
|
||||
})
|
||||
fmt.Println("Progress: ", progress)
|
||||
case <-done:
|
||||
fn(api.ProgressResponse{
|
||||
Status: fmt.Sprintf("quantizing model tensors %d/%d", tensorCount, tensorCount),
|
||||
Quantize: "quant",
|
||||
})
|
||||
return
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
if rc := C.llama_model_quantize(cinfile, coutfile, ¶ms); rc != 0 {
|
||||
return fmt.Errorf("llama_model_quantize: %d", rc)
|
||||
}
|
||||
|
||||
52
llm/patches/10-quantize-callback.diff
Normal file
52
llm/patches/10-quantize-callback.diff
Normal file
@@ -0,0 +1,52 @@
|
||||
From ed941590d59fc07b1ad21d6aa458588e47d1e446 Mon Sep 17 00:00:00 2001
|
||||
From: Josh Yan <jyan00017@gmail.com>
|
||||
Date: Wed, 10 Jul 2024 13:39:39 -0700
|
||||
Subject: [PATCH] quantize progress
|
||||
|
||||
---
|
||||
include/llama.h | 3 +++
|
||||
src/llama.cpp | 8 ++++++++
|
||||
2 files changed, 11 insertions(+)
|
||||
|
||||
diff --git a/include/llama.h b/include/llama.h
|
||||
index bb4b05ba..613db68e 100644
|
||||
--- a/include/llama.h
|
||||
+++ b/include/llama.h
|
||||
@@ -349,6 +349,9 @@ extern "C" {
|
||||
bool keep_split; // quantize to the same number of shards
|
||||
void * imatrix; // pointer to importance matrix data
|
||||
void * kv_overrides; // pointer to vector containing overrides
|
||||
+
|
||||
+ llama_progress_callback quantize_callback; // callback to report quantization progress
|
||||
+ void * quantize_callback_data; // user data for the callback
|
||||
} llama_model_quantize_params;
|
||||
|
||||
// grammar types
|
||||
diff --git a/src/llama.cpp b/src/llama.cpp
|
||||
index 2b9ace28..ac640c02 100644
|
||||
--- a/src/llama.cpp
|
||||
+++ b/src/llama.cpp
|
||||
@@ -18252,6 +18252,12 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
||||
const auto tn = LLM_TN(model.arch);
|
||||
new_ofstream(0);
|
||||
for (int i = 0; i < ml.n_tensors; ++i) {
|
||||
+ if (params->quantize_callback){
|
||||
+ if (!params->quantize_callback(i, params->quantize_callback_data)) {
|
||||
+ return;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
auto weight = ml.get_weight(i);
|
||||
struct ggml_tensor * tensor = weight->tensor;
|
||||
if (weight->idx != cur_split && params->keep_split) {
|
||||
@@ -18789,6 +18795,8 @@ struct llama_model_quantize_params llama_model_quantize_default_params() {
|
||||
/*.keep_split =*/ false,
|
||||
/*.imatrix =*/ nullptr,
|
||||
/*.kv_overrides =*/ nullptr,
|
||||
+ /*.quantize_callback =*/ nullptr,
|
||||
+ /*.quantize_callback_data =*/ nullptr,
|
||||
};
|
||||
|
||||
return result;
|
||||
--
|
||||
2.39.3 (Apple Git-146)
|
||||
@@ -33,7 +33,7 @@ type LlamaServer interface {
|
||||
Ping(ctx context.Context) error
|
||||
WaitUntilRunning(ctx context.Context) error
|
||||
Completion(ctx context.Context, req CompletionRequest, fn func(CompletionResponse)) error
|
||||
Embed(ctx context.Context, input []string) ([][]float32, error)
|
||||
Embedding(ctx context.Context, prompt string) ([]float64, error)
|
||||
Tokenize(ctx context.Context, content string) ([]int, error)
|
||||
Detokenize(ctx context.Context, tokens []int) (string, error)
|
||||
Close() error
|
||||
@@ -859,15 +859,15 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu
|
||||
return nil
|
||||
}
|
||||
|
||||
type EmbedRequest struct {
|
||||
Content []string `json:"content"`
|
||||
type EmbeddingRequest struct {
|
||||
Content string `json:"content"`
|
||||
}
|
||||
|
||||
type EmbedResponse struct {
|
||||
Embedding [][]float32 `json:"embedding"`
|
||||
type EmbeddingResponse struct {
|
||||
Embedding []float64 `json:"embedding"`
|
||||
}
|
||||
|
||||
func (s *llmServer) Embed(ctx context.Context, input []string) ([][]float32, error) {
|
||||
func (s *llmServer) Embedding(ctx context.Context, prompt string) ([]float64, error) {
|
||||
if err := s.sem.Acquire(ctx, 1); err != nil {
|
||||
slog.Error("Failed to acquire semaphore", "error", err)
|
||||
return nil, err
|
||||
@@ -882,7 +882,7 @@ func (s *llmServer) Embed(ctx context.Context, input []string) ([][]float32, err
|
||||
return nil, fmt.Errorf("unexpected server status: %s", status.ToString())
|
||||
}
|
||||
|
||||
data, err := json.Marshal(EmbedRequest{Content: input})
|
||||
data, err := json.Marshal(TokenizeRequest{Content: prompt})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error marshaling embed data: %w", err)
|
||||
}
|
||||
@@ -909,7 +909,7 @@ func (s *llmServer) Embed(ctx context.Context, input []string) ([][]float32, err
|
||||
return nil, fmt.Errorf("%s", body)
|
||||
}
|
||||
|
||||
var embedding EmbedResponse
|
||||
var embedding EmbeddingResponse
|
||||
if err := json.Unmarshal(body, &embedding); err != nil {
|
||||
return nil, fmt.Errorf("unmarshal tokenize response: %w", err)
|
||||
}
|
||||
|
||||
@@ -338,12 +338,16 @@ func fromCompleteRequest(r CompletionRequest) (api.GenerateRequest, error) {
|
||||
switch stop := r.Stop.(type) {
|
||||
case string:
|
||||
options["stop"] = []string{stop}
|
||||
case []string:
|
||||
options["stop"] = stop
|
||||
default:
|
||||
if r.Stop != nil {
|
||||
return api.GenerateRequest{}, fmt.Errorf("invalid type for 'stop' field: %T", r.Stop)
|
||||
case []any:
|
||||
var stops []string
|
||||
for _, s := range stop {
|
||||
if str, ok := s.(string); ok {
|
||||
stops = append(stops, str)
|
||||
} else {
|
||||
return api.GenerateRequest{}, fmt.Errorf("invalid type for 'stop' field: %T", s)
|
||||
}
|
||||
}
|
||||
options["stop"] = stops
|
||||
}
|
||||
|
||||
if r.MaxTokens != nil {
|
||||
|
||||
@@ -3,7 +3,6 @@ package openai
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
@@ -16,7 +15,133 @@ import (
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestMiddleware(t *testing.T) {
|
||||
func TestMiddlewareRequests(t *testing.T) {
|
||||
type testCase struct {
|
||||
Name string
|
||||
Method string
|
||||
Path string
|
||||
Handler func() gin.HandlerFunc
|
||||
Setup func(t *testing.T, req *http.Request)
|
||||
Expected func(t *testing.T, req *http.Request)
|
||||
}
|
||||
|
||||
var capturedRequest *http.Request
|
||||
|
||||
captureRequestMiddleware := func() gin.HandlerFunc {
|
||||
return func(c *gin.Context) {
|
||||
bodyBytes, _ := io.ReadAll(c.Request.Body)
|
||||
c.Request.Body = io.NopCloser(bytes.NewReader(bodyBytes))
|
||||
capturedRequest = c.Request
|
||||
c.Next()
|
||||
}
|
||||
}
|
||||
|
||||
testCases := []testCase{
|
||||
{
|
||||
Name: "chat handler",
|
||||
Method: http.MethodPost,
|
||||
Path: "/api/chat",
|
||||
Handler: ChatMiddleware,
|
||||
Setup: func(t *testing.T, req *http.Request) {
|
||||
body := ChatCompletionRequest{
|
||||
Model: "test-model",
|
||||
Messages: []Message{{Role: "user", Content: "Hello"}},
|
||||
}
|
||||
|
||||
bodyBytes, _ := json.Marshal(body)
|
||||
|
||||
req.Body = io.NopCloser(bytes.NewReader(bodyBytes))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
},
|
||||
Expected: func(t *testing.T, req *http.Request) {
|
||||
var chatReq api.ChatRequest
|
||||
if err := json.NewDecoder(req.Body).Decode(&chatReq); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if chatReq.Messages[0].Role != "user" {
|
||||
t.Fatalf("expected 'user', got %s", chatReq.Messages[0].Role)
|
||||
}
|
||||
|
||||
if chatReq.Messages[0].Content != "Hello" {
|
||||
t.Fatalf("expected 'Hello', got %s", chatReq.Messages[0].Content)
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "completions handler",
|
||||
Method: http.MethodPost,
|
||||
Path: "/api/generate",
|
||||
Handler: CompletionsMiddleware,
|
||||
Setup: func(t *testing.T, req *http.Request) {
|
||||
temp := float32(0.8)
|
||||
body := CompletionRequest{
|
||||
Model: "test-model",
|
||||
Prompt: "Hello",
|
||||
Temperature: &temp,
|
||||
Stop: []string{"\n", "stop"},
|
||||
}
|
||||
|
||||
bodyBytes, _ := json.Marshal(body)
|
||||
|
||||
req.Body = io.NopCloser(bytes.NewReader(bodyBytes))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
},
|
||||
Expected: func(t *testing.T, req *http.Request) {
|
||||
var genReq api.GenerateRequest
|
||||
if err := json.NewDecoder(req.Body).Decode(&genReq); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if genReq.Prompt != "Hello" {
|
||||
t.Fatalf("expected 'Hello', got %s", genReq.Prompt)
|
||||
}
|
||||
|
||||
if genReq.Options["temperature"] != 1.6 {
|
||||
t.Fatalf("expected 1.6, got %f", genReq.Options["temperature"])
|
||||
}
|
||||
|
||||
stopTokens, ok := genReq.Options["stop"].([]any)
|
||||
|
||||
if !ok {
|
||||
t.Fatalf("expected stop tokens to be a list")
|
||||
}
|
||||
|
||||
if stopTokens[0] != "\n" || stopTokens[1] != "stop" {
|
||||
t.Fatalf("expected ['\\n', 'stop'], got %v", stopTokens)
|
||||
}
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
gin.SetMode(gin.TestMode)
|
||||
router := gin.New()
|
||||
|
||||
endpoint := func(c *gin.Context) {
|
||||
c.Status(http.StatusOK)
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.Name, func(t *testing.T) {
|
||||
router = gin.New()
|
||||
router.Use(captureRequestMiddleware())
|
||||
router.Use(tc.Handler())
|
||||
router.Handle(tc.Method, tc.Path, endpoint)
|
||||
req, _ := http.NewRequest(tc.Method, tc.Path, nil)
|
||||
|
||||
if tc.Setup != nil {
|
||||
tc.Setup(t, req)
|
||||
}
|
||||
|
||||
resp := httptest.NewRecorder()
|
||||
router.ServeHTTP(resp, req)
|
||||
|
||||
tc.Expected(t, capturedRequest)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestMiddlewareResponses(t *testing.T) {
|
||||
type testCase struct {
|
||||
Name string
|
||||
Method string
|
||||
@@ -30,159 +155,7 @@ func TestMiddleware(t *testing.T) {
|
||||
|
||||
testCases := []testCase{
|
||||
{
|
||||
Name: "chat handler",
|
||||
Method: http.MethodPost,
|
||||
Path: "/api/chat",
|
||||
TestPath: "/api/chat",
|
||||
Handler: ChatMiddleware,
|
||||
Endpoint: func(c *gin.Context) {
|
||||
var chatReq api.ChatRequest
|
||||
if err := c.ShouldBindJSON(&chatReq); err != nil {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid request"})
|
||||
return
|
||||
}
|
||||
|
||||
userMessage := chatReq.Messages[0].Content
|
||||
var assistantMessage string
|
||||
|
||||
switch userMessage {
|
||||
case "Hello":
|
||||
assistantMessage = "Hello!"
|
||||
default:
|
||||
assistantMessage = "I'm not sure how to respond to that."
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, api.ChatResponse{
|
||||
Message: api.Message{
|
||||
Role: "assistant",
|
||||
Content: assistantMessage,
|
||||
},
|
||||
})
|
||||
},
|
||||
Setup: func(t *testing.T, req *http.Request) {
|
||||
body := ChatCompletionRequest{
|
||||
Model: "test-model",
|
||||
Messages: []Message{{Role: "user", Content: "Hello"}},
|
||||
}
|
||||
|
||||
bodyBytes, _ := json.Marshal(body)
|
||||
|
||||
req.Body = io.NopCloser(bytes.NewReader(bodyBytes))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
},
|
||||
Expected: func(t *testing.T, resp *httptest.ResponseRecorder) {
|
||||
assert.Equal(t, http.StatusOK, resp.Code)
|
||||
|
||||
var chatResp ChatCompletion
|
||||
if err := json.NewDecoder(resp.Body).Decode(&chatResp); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if chatResp.Object != "chat.completion" {
|
||||
t.Fatalf("expected chat.completion, got %s", chatResp.Object)
|
||||
}
|
||||
|
||||
if chatResp.Choices[0].Message.Content != "Hello!" {
|
||||
t.Fatalf("expected Hello!, got %s", chatResp.Choices[0].Message.Content)
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "completions handler",
|
||||
Method: http.MethodPost,
|
||||
Path: "/api/generate",
|
||||
TestPath: "/api/generate",
|
||||
Handler: CompletionsMiddleware,
|
||||
Endpoint: func(c *gin.Context) {
|
||||
c.JSON(http.StatusOK, api.GenerateResponse{
|
||||
Response: "Hello!",
|
||||
})
|
||||
},
|
||||
Setup: func(t *testing.T, req *http.Request) {
|
||||
body := CompletionRequest{
|
||||
Model: "test-model",
|
||||
Prompt: "Hello",
|
||||
}
|
||||
|
||||
bodyBytes, _ := json.Marshal(body)
|
||||
|
||||
req.Body = io.NopCloser(bytes.NewReader(bodyBytes))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
},
|
||||
Expected: func(t *testing.T, resp *httptest.ResponseRecorder) {
|
||||
assert.Equal(t, http.StatusOK, resp.Code)
|
||||
var completionResp Completion
|
||||
if err := json.NewDecoder(resp.Body).Decode(&completionResp); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if completionResp.Object != "text_completion" {
|
||||
t.Fatalf("expected text_completion, got %s", completionResp.Object)
|
||||
}
|
||||
|
||||
if completionResp.Choices[0].Text != "Hello!" {
|
||||
t.Fatalf("expected Hello!, got %s", completionResp.Choices[0].Text)
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "completions handler with params",
|
||||
Method: http.MethodPost,
|
||||
Path: "/api/generate",
|
||||
TestPath: "/api/generate",
|
||||
Handler: CompletionsMiddleware,
|
||||
Endpoint: func(c *gin.Context) {
|
||||
var generateReq api.GenerateRequest
|
||||
if err := c.ShouldBindJSON(&generateReq); err != nil {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid request"})
|
||||
return
|
||||
}
|
||||
|
||||
temperature := generateReq.Options["temperature"].(float64)
|
||||
var assistantMessage string
|
||||
|
||||
switch temperature {
|
||||
case 1.6:
|
||||
assistantMessage = "Received temperature of 1.6"
|
||||
default:
|
||||
assistantMessage = fmt.Sprintf("Received temperature of %f", temperature)
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, api.GenerateResponse{
|
||||
Response: assistantMessage,
|
||||
})
|
||||
},
|
||||
Setup: func(t *testing.T, req *http.Request) {
|
||||
temp := float32(0.8)
|
||||
body := CompletionRequest{
|
||||
Model: "test-model",
|
||||
Prompt: "Hello",
|
||||
Temperature: &temp,
|
||||
}
|
||||
|
||||
bodyBytes, _ := json.Marshal(body)
|
||||
|
||||
req.Body = io.NopCloser(bytes.NewReader(bodyBytes))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
},
|
||||
Expected: func(t *testing.T, resp *httptest.ResponseRecorder) {
|
||||
assert.Equal(t, http.StatusOK, resp.Code)
|
||||
var completionResp Completion
|
||||
if err := json.NewDecoder(resp.Body).Decode(&completionResp); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if completionResp.Object != "text_completion" {
|
||||
t.Fatalf("expected text_completion, got %s", completionResp.Object)
|
||||
}
|
||||
|
||||
if completionResp.Choices[0].Text != "Received temperature of 1.6" {
|
||||
t.Fatalf("expected Received temperature of 1.6, got %s", completionResp.Choices[0].Text)
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "completions handler with error",
|
||||
Name: "completions handler error forwarding",
|
||||
Method: http.MethodPost,
|
||||
Path: "/api/generate",
|
||||
TestPath: "/api/generate",
|
||||
|
||||
@@ -31,6 +31,10 @@ func NewSpinner(message string) *Spinner {
|
||||
return s
|
||||
}
|
||||
|
||||
func (s *Spinner) SetMessage(message string) {
|
||||
s.message = message
|
||||
}
|
||||
|
||||
func (s *Spinner) String() string {
|
||||
var sb strings.Builder
|
||||
if len(s.message) > 0 {
|
||||
|
||||
@@ -32,6 +32,7 @@ import (
|
||||
"github.com/ollama/ollama/types/errtypes"
|
||||
"github.com/ollama/ollama/types/model"
|
||||
"github.com/ollama/ollama/version"
|
||||
"golang.org/x/crypto/ssh"
|
||||
)
|
||||
|
||||
var errCapabilityCompletion = errors.New("completion")
|
||||
@@ -422,11 +423,15 @@ func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantizatio
|
||||
return err
|
||||
}
|
||||
|
||||
tensorCount := len(baseLayer.GGML.Tensors())
|
||||
ft := baseLayer.GGML.KV().FileType()
|
||||
if !slices.Contains([]string{"F16", "F32"}, ft.String()) {
|
||||
return errors.New("quantization is only supported for F16 and F32 models")
|
||||
} else if want != ft {
|
||||
fn(api.ProgressResponse{Status: fmt.Sprintf("quantizing %s model to %s", ft, quantization)})
|
||||
fn(api.ProgressResponse{
|
||||
Status: "quantizing model tensors",
|
||||
Quantize: "quant",
|
||||
})
|
||||
|
||||
blob, err := GetBlobsPath(baseLayer.Digest)
|
||||
if err != nil {
|
||||
@@ -440,7 +445,7 @@ func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantizatio
|
||||
defer temp.Close()
|
||||
defer os.Remove(temp.Name())
|
||||
|
||||
if err := llm.Quantize(blob, temp.Name(), want); err != nil {
|
||||
if err := llm.Quantize(blob, temp.Name(), want, fn, tensorCount); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -1064,11 +1069,12 @@ func makeRequestWithRetry(ctx context.Context, method string, requestURL *url.UR
|
||||
if anonymous {
|
||||
// no user is associated with the public key, and the request requires non-anonymous access
|
||||
pubKey, nestedErr := auth.GetPublicKey()
|
||||
localPubKey := strings.TrimSpace(string(ssh.MarshalAuthorizedKey(pubKey)))
|
||||
if nestedErr != nil {
|
||||
slog.Error(fmt.Sprintf("couldn't get public key: %v", nestedErr))
|
||||
return nil, errUnauthorized
|
||||
}
|
||||
return nil, &errtypes.UnknownOllamaKey{Key: pubKey}
|
||||
return nil, &errtypes.UnknownOllamaKey{Key: localPubKey}
|
||||
}
|
||||
// user is associated with the public key, but is not authorized to make the request
|
||||
return nil, errUnauthorized
|
||||
|
||||
224
server/routes.go
224
server/routes.go
@@ -4,12 +4,13 @@ import (
|
||||
"bytes"
|
||||
"cmp"
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"log/slog"
|
||||
"math"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/netip"
|
||||
@@ -18,14 +19,15 @@ import (
|
||||
"path/filepath"
|
||||
"slices"
|
||||
"strings"
|
||||
"sync"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/gin-contrib/cors"
|
||||
"github.com/gin-gonic/gin"
|
||||
"golang.org/x/crypto/ssh"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/auth"
|
||||
"github.com/ollama/ollama/envconfig"
|
||||
"github.com/ollama/ollama/gpu"
|
||||
"github.com/ollama/ollama/llm"
|
||||
@@ -248,152 +250,6 @@ func (s *Server) GenerateHandler(c *gin.Context) {
|
||||
streamResponse(c, ch)
|
||||
}
|
||||
|
||||
func (s *Server) EmbedHandler(c *gin.Context) {
|
||||
var req api.EmbedRequest
|
||||
err := c.ShouldBindJSON(&req)
|
||||
switch {
|
||||
case errors.Is(err, io.EOF):
|
||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
|
||||
return
|
||||
case err != nil:
|
||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
if req.Truncate == nil {
|
||||
truncate := true
|
||||
req.Truncate = &truncate
|
||||
}
|
||||
|
||||
reqEmbed := []string{}
|
||||
|
||||
switch embeddings := req.Input.(type) {
|
||||
case string:
|
||||
if embeddings == "" {
|
||||
c.JSON(http.StatusOK, api.EmbedResponse{Model: req.Model, Embeddings: [][]float32{}})
|
||||
return
|
||||
}
|
||||
reqEmbed = []string{embeddings}
|
||||
case []any:
|
||||
if len(embeddings) == 0 {
|
||||
c.JSON(http.StatusOK, api.EmbedResponse{Model: req.Model, Embeddings: [][]float32{}})
|
||||
return
|
||||
}
|
||||
|
||||
for _, v := range embeddings {
|
||||
if _, ok := v.(string); !ok {
|
||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "invalid input type"})
|
||||
return
|
||||
}
|
||||
reqEmbed = append(reqEmbed, v.(string))
|
||||
}
|
||||
default:
|
||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "invalid input type"})
|
||||
return
|
||||
}
|
||||
|
||||
r, m, opts, err := s.scheduleRunner(c.Request.Context(), req.Model, []Capability{}, req.Options, req.KeepAlive)
|
||||
if err != nil {
|
||||
handleScheduleError(c, req.Model, err)
|
||||
return
|
||||
}
|
||||
|
||||
kvData, err := getKVData(m.ModelPath, false)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
reqEmbedArray := make([]string, len(reqEmbed))
|
||||
errCh := make(chan error, 1)
|
||||
successCh := make(chan bool, 1)
|
||||
sem := make(chan struct{}, 2)
|
||||
var wg sync.WaitGroup
|
||||
var mu sync.Mutex
|
||||
for i, s := range reqEmbed {
|
||||
wg.Add(1)
|
||||
sem <- struct{}{}
|
||||
go func(i int, s string) {
|
||||
defer wg.Done()
|
||||
defer func() { <-sem }()
|
||||
tokens, err := r.Tokenize(c.Request.Context(), s)
|
||||
if err != nil {
|
||||
errCh <- err
|
||||
return
|
||||
}
|
||||
|
||||
ctxLen := min(opts.NumCtx, int(kvData.ContextLength()))
|
||||
if len(tokens) > ctxLen {
|
||||
if *req.Truncate {
|
||||
tokens = tokens[:ctxLen]
|
||||
s, err = r.Detokenize(c.Request.Context(), tokens)
|
||||
if err != nil {
|
||||
errCh <- err
|
||||
return
|
||||
}
|
||||
} else {
|
||||
errCh <- err
|
||||
return
|
||||
}
|
||||
}
|
||||
mu.Lock()
|
||||
reqEmbedArray[i] = s
|
||||
mu.Unlock()
|
||||
}(i, s)
|
||||
}
|
||||
go func() {
|
||||
wg.Wait()
|
||||
successCh <- true
|
||||
close(errCh)
|
||||
}()
|
||||
|
||||
select {
|
||||
case err := <-errCh:
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
return
|
||||
case success := <-successCh:
|
||||
if !success {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to process all embeddings"})
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
embeddings, err := r.Embed(c.Request.Context(), reqEmbedArray)
|
||||
|
||||
if err != nil {
|
||||
slog.Info(fmt.Sprintf("embedding generation failed: %v", err))
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate embedding"})
|
||||
return
|
||||
}
|
||||
|
||||
for i, e := range embeddings {
|
||||
embeddings[i] = normalize(e)
|
||||
}
|
||||
|
||||
resp := api.EmbedResponse{
|
||||
Model: req.Model,
|
||||
Embeddings: embeddings,
|
||||
}
|
||||
c.JSON(http.StatusOK, resp)
|
||||
}
|
||||
|
||||
func normalize(vec []float32) []float32 {
|
||||
var sum float32
|
||||
for _, v := range vec {
|
||||
sum += v * v
|
||||
}
|
||||
|
||||
norm := float32(0.0)
|
||||
if sum > 0 {
|
||||
norm = float32(1.0 / math.Sqrt(float64(sum)))
|
||||
}
|
||||
|
||||
for i := range vec {
|
||||
vec[i] *= norm
|
||||
}
|
||||
return vec
|
||||
}
|
||||
|
||||
func (s *Server) EmbeddingsHandler(c *gin.Context) {
|
||||
var req api.EmbeddingRequest
|
||||
if err := c.ShouldBindJSON(&req); errors.Is(err, io.EOF) {
|
||||
@@ -416,24 +272,14 @@ func (s *Server) EmbeddingsHandler(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
embedding, err := r.Embed(c.Request.Context(), []string{req.Prompt})
|
||||
|
||||
embedding, err := r.Embedding(c.Request.Context(), req.Prompt)
|
||||
if err != nil {
|
||||
slog.Info(fmt.Sprintf("embedding generation failed: %v", err))
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate embedding"})
|
||||
return
|
||||
}
|
||||
|
||||
embedding64 := make([]float64, len(embedding[0]))
|
||||
|
||||
for i, v := range embedding[0] {
|
||||
embedding64[i] = float64(v)
|
||||
}
|
||||
|
||||
resp := api.EmbeddingResponse{
|
||||
Embedding: embedding64,
|
||||
}
|
||||
c.JSON(http.StatusOK, resp)
|
||||
c.JSON(http.StatusOK, api.EmbeddingResponse{Embedding: embedding})
|
||||
}
|
||||
|
||||
func (s *Server) PullModelHandler(c *gin.Context) {
|
||||
@@ -928,7 +774,6 @@ func (s *Server) CreateBlobHandler(c *gin.Context) {
|
||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
_, err = os.Stat(path)
|
||||
switch {
|
||||
case errors.Is(err, os.ErrNotExist):
|
||||
@@ -941,6 +786,12 @@ func (s *Server) CreateBlobHandler(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
if c.GetHeader("X-Redirect-Create") == "1" && s.IsLocal(c) {
|
||||
c.Header("LocalLocation", path)
|
||||
c.Status(http.StatusTemporaryRedirect)
|
||||
return
|
||||
}
|
||||
|
||||
layer, err := NewLayer(c.Request.Body, "")
|
||||
if err != nil {
|
||||
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
@@ -955,6 +806,54 @@ func (s *Server) CreateBlobHandler(c *gin.Context) {
|
||||
c.Status(http.StatusCreated)
|
||||
}
|
||||
|
||||
func (s *Server) IsLocal(c *gin.Context) bool {
|
||||
if authz := c.GetHeader("Authorization"); authz != "" {
|
||||
parts := strings.Split(authz, ":")
|
||||
if len(parts) != 3 {
|
||||
return false
|
||||
}
|
||||
|
||||
clientPublicKey, _, _, _, err := ssh.ParseAuthorizedKey([]byte(fmt.Sprintf("ssh-ed25519 %s", parts[0])))
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
// partialRequestData is formatted as http.Method,http.requestURI,timestamp,nonce
|
||||
requestData, err := base64.StdEncoding.DecodeString(parts[1])
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
partialRequestDataParts := strings.Split(string(requestData), ",")
|
||||
if len(partialRequestDataParts) != 3 {
|
||||
return false
|
||||
}
|
||||
|
||||
signature, err := base64.StdEncoding.DecodeString(parts[2])
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
if err := clientPublicKey.Verify(requestData, &ssh.Signature{Format: clientPublicKey.Type(), Blob: signature}); err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
serverPublicKey, err := auth.GetPublicKey()
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
if bytes.Equal(serverPublicKey.Marshal(), clientPublicKey.Marshal()) {
|
||||
return true
|
||||
}
|
||||
|
||||
c.AbortWithStatusJSON(http.StatusUnauthorized, gin.H{"error": "unauthorized"})
|
||||
return false
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func isLocalIP(ip netip.Addr) bool {
|
||||
if interfaces, err := net.Interfaces(); err == nil {
|
||||
for _, iface := range interfaces {
|
||||
@@ -1059,8 +958,7 @@ func (s *Server) GenerateRoutes() http.Handler {
|
||||
r.POST("/api/pull", s.PullModelHandler)
|
||||
r.POST("/api/generate", s.GenerateHandler)
|
||||
r.POST("/api/chat", s.ChatHandler)
|
||||
r.POST("/api/embed", s.EmbedHandler)
|
||||
r.POST("/api/embeddings", s.EmbeddingsHandler) // legacy
|
||||
r.POST("/api/embeddings", s.EmbeddingsHandler)
|
||||
r.POST("/api/create", s.CreateModelHandler)
|
||||
r.POST("/api/push", s.PushModelHandler)
|
||||
r.POST("/api/copy", s.CopyModelHandler)
|
||||
|
||||
@@ -7,7 +7,6 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"math"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
@@ -273,73 +272,6 @@ func Test_Routes(t *testing.T) {
|
||||
assert.Equal(t, "library", retrieveResp.OwnedBy)
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "Embed Handler Empty Input",
|
||||
Method: http.MethodPost,
|
||||
Path: "/api/embed",
|
||||
Setup: func(t *testing.T, req *http.Request) {
|
||||
embedReq := api.EmbedRequest{
|
||||
Model: "t-bone",
|
||||
Input: "",
|
||||
}
|
||||
jsonData, err := json.Marshal(embedReq)
|
||||
require.NoError(t, err)
|
||||
req.Body = io.NopCloser(bytes.NewReader(jsonData))
|
||||
},
|
||||
Expected: func(t *testing.T, resp *http.Response) {
|
||||
contentType := resp.Header.Get("Content-Type")
|
||||
if contentType != "application/json; charset=utf-8" {
|
||||
t.Fatalf("expected content type application/json; charset=utf-8, got %s", contentType)
|
||||
}
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
var embedResp api.EmbedResponse
|
||||
err = json.Unmarshal(body, &embedResp)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if embedResp.Model != "t-bone" {
|
||||
t.Fatalf("expected model t-bone, got %s", embedResp.Model)
|
||||
}
|
||||
|
||||
if embedResp.Embeddings != nil {
|
||||
t.Fatalf("expected embeddings to be nil, got %v", embedResp.Embeddings)
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "Embed Handler Invalid Input",
|
||||
Method: http.MethodPost,
|
||||
Path: "/api/embed",
|
||||
Setup: func(t *testing.T, req *http.Request) {
|
||||
embedReq := api.EmbedRequest{
|
||||
Model: "t-bone",
|
||||
Input: 2,
|
||||
}
|
||||
jsonData, err := json.Marshal(embedReq)
|
||||
require.NoError(t, err)
|
||||
req.Body = io.NopCloser(bytes.NewReader(jsonData))
|
||||
},
|
||||
Expected: func(t *testing.T, resp *http.Response) {
|
||||
contentType := resp.Header.Get("Content-Type")
|
||||
if contentType != "application/json; charset=utf-8" {
|
||||
t.Fatalf("expected content type application/json; charset=utf-8, got %s", contentType)
|
||||
}
|
||||
_, err := io.ReadAll(resp.Body)
|
||||
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if resp.StatusCode != http.StatusBadRequest {
|
||||
t.Fatalf("expected status code 400, got %d", resp.StatusCode)
|
||||
}
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
t.Setenv("OLLAMA_MODELS", t.TempDir())
|
||||
@@ -488,38 +420,3 @@ func TestShow(t *testing.T) {
|
||||
t.Fatal("Expected projector architecture to be 'clip', but got", resp.ProjectorInfo["general.architecture"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestNormalize(t *testing.T) {
|
||||
type testCase struct {
|
||||
input []float32
|
||||
}
|
||||
|
||||
testCases := []testCase{
|
||||
{input: []float32{1}},
|
||||
{input: []float32{0, 1, 2, 3}},
|
||||
{input: []float32{0.1, 0.2, 0.3}},
|
||||
{input: []float32{-0.1, 0.2, 0.3, -0.4}},
|
||||
{input: []float32{0, 0, 0}},
|
||||
}
|
||||
|
||||
isNormalized := func(vec []float32) (res bool) {
|
||||
sum := 0.0
|
||||
for _, v := range vec {
|
||||
sum += float64(v * v)
|
||||
}
|
||||
if math.Abs(sum-1) > 1e-6 {
|
||||
return sum == 0
|
||||
} else {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run("", func(t *testing.T) {
|
||||
normalized := normalize(tc.input)
|
||||
if !isNormalized(normalized) {
|
||||
t.Errorf("Vector %v is not normalized", tc.input)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -642,8 +642,8 @@ type mockLlm struct {
|
||||
pingResp error
|
||||
waitResp error
|
||||
completionResp error
|
||||
embedResp [][]float32
|
||||
embedRespErr error
|
||||
embeddingResp []float64
|
||||
embeddingRespErr error
|
||||
tokenizeResp []int
|
||||
tokenizeRespErr error
|
||||
detokenizeResp string
|
||||
@@ -660,8 +660,8 @@ func (s *mockLlm) WaitUntilRunning(ctx context.Context) error { return s.waitRes
|
||||
func (s *mockLlm) Completion(ctx context.Context, req llm.CompletionRequest, fn func(llm.CompletionResponse)) error {
|
||||
return s.completionResp
|
||||
}
|
||||
func (s *mockLlm) Embed(ctx context.Context, input []string) ([][]float32, error) {
|
||||
return s.embedResp, s.embedRespErr
|
||||
func (s *mockLlm) Embedding(ctx context.Context, prompt string) ([]float64, error) {
|
||||
return s.embeddingResp, s.embeddingRespErr
|
||||
}
|
||||
func (s *mockLlm) Tokenize(ctx context.Context, content string) ([]int, error) {
|
||||
return s.tokenizeResp, s.tokenizeRespErr
|
||||
|
||||
Reference in New Issue
Block a user