mirror of
https://github.com/ollama/ollama.git
synced 2026-01-16 19:41:24 -05:00
Compare commits
28 Commits
mxyng/extr
...
format-con
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
23481167a7 | ||
|
|
07b4074e7b | ||
|
|
61dda6a5e0 | ||
|
|
e1f9ced568 | ||
|
|
9795b43d93 | ||
|
|
0980d5c7e3 | ||
|
|
0dae34b6a7 | ||
|
|
83c6be1666 | ||
|
|
1adfa67589 | ||
|
|
790d24eb7b | ||
|
|
7de300856b | ||
|
|
213ffdb548 | ||
|
|
d42d88386a | ||
|
|
154f24af91 | ||
|
|
a1ecdd36d5 | ||
|
|
d18282bfda | ||
|
|
9ae76ba8c9 | ||
|
|
2bc06565c7 | ||
|
|
d1c2558f7e | ||
|
|
7b5aefb427 | ||
|
|
06ef90c051 | ||
|
|
7efbc84320 | ||
|
|
e9f6df7dca | ||
|
|
7fa6e51686 | ||
|
|
8dc68417e7 | ||
|
|
681f3c4c42 | ||
|
|
59a705525c | ||
|
|
5d3f314b0b |
@@ -1,8 +1,4 @@
|
||||
build
|
||||
llama/build
|
||||
.venv
|
||||
.vscode
|
||||
ollama
|
||||
app
|
||||
web
|
||||
.env
|
||||
llm/llama.cpp/ggml
|
||||
|
||||
18
Dockerfile
18
Dockerfile
@@ -1,15 +1,21 @@
|
||||
FROM golang:1.20
|
||||
FROM golang:alpine
|
||||
|
||||
WORKDIR /go/src/github.com/jmorganca/ollama
|
||||
RUN apk add --no-cache git build-base cmake
|
||||
|
||||
COPY . .
|
||||
RUN CGO_ENABLED=1 go build -ldflags '-linkmode external -extldflags "-static"' .
|
||||
RUN go generate ./... && go build -ldflags '-linkmode external -extldflags "-static"' .
|
||||
|
||||
FROM alpine
|
||||
COPY --from=0 /go/src/github.com/jmorganca/ollama/ollama /bin/ollama
|
||||
EXPOSE 11434
|
||||
ENV OLLAMA_HOST 0.0.0.0
|
||||
RUN apk add --no-cache libstdc++
|
||||
|
||||
ARG USER=ollama
|
||||
ARG GROUP=ollama
|
||||
RUN addgroup -g 1000 $GROUP && adduser -u 1000 -DG $GROUP $USER
|
||||
RUN addgroup $GROUP && adduser -D -G $GROUP $USER
|
||||
|
||||
COPY --from=0 /go/src/github.com/jmorganca/ollama/ollama /bin/ollama
|
||||
|
||||
USER $USER:$GROUP
|
||||
ENTRYPOINT ["/bin/ollama"]
|
||||
ENV OLLAMA_HOST 0.0.0.0
|
||||
CMD ["serve"]
|
||||
|
||||
@@ -165,10 +165,11 @@ Ollama bundles model weights, configurations, and data into a single package, de
|
||||
|
||||
## Building
|
||||
|
||||
Install `cmake`:
|
||||
Install `cmake` and `go`:
|
||||
|
||||
```
|
||||
brew install cmake
|
||||
brew install go
|
||||
```
|
||||
|
||||
Then generate dependencies and build:
|
||||
|
||||
@@ -255,6 +255,14 @@ func (c *Client) Delete(ctx context.Context, req *DeleteRequest) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Client) Show(ctx context.Context, req *ShowRequest) (*ShowResponse, error) {
|
||||
var resp ShowResponse
|
||||
if err := c.do(ctx, http.MethodPost, "/api/show", req, &resp); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &resp, nil
|
||||
}
|
||||
|
||||
func (c *Client) Heartbeat(ctx context.Context) error {
|
||||
if err := c.do(ctx, http.MethodHead, "/", nil, nil); err != nil {
|
||||
return err
|
||||
|
||||
12
api/types.go
12
api/types.go
@@ -61,6 +61,18 @@ type DeleteRequest struct {
|
||||
Name string `json:"name"`
|
||||
}
|
||||
|
||||
type ShowRequest struct {
|
||||
Name string `json:"name"`
|
||||
}
|
||||
|
||||
type ShowResponse struct {
|
||||
License string `json:"license,omitempty"`
|
||||
Modelfile string `json:"modelfile,omitempty"`
|
||||
Parameters string `json:"parameters,omitempty"`
|
||||
Template string `json:"template,omitempty"`
|
||||
System string `json:"system,omitempty"`
|
||||
}
|
||||
|
||||
type CopyRequest struct {
|
||||
Source string `json:"source"`
|
||||
Destination string `json:"destination"`
|
||||
|
||||
149
cmd/cmd.go
149
cmd/cmd.go
@@ -230,6 +230,84 @@ func DeleteHandler(cmd *cobra.Command, args []string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func ShowHandler(cmd *cobra.Command, args []string) error {
|
||||
client, err := api.FromEnv()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if len(args) != 1 {
|
||||
return errors.New("missing model name")
|
||||
}
|
||||
|
||||
license, errLicense := cmd.Flags().GetBool("license")
|
||||
modelfile, errModelfile := cmd.Flags().GetBool("modelfile")
|
||||
parameters, errParams := cmd.Flags().GetBool("parameters")
|
||||
system, errSystem := cmd.Flags().GetBool("system")
|
||||
template, errTemplate := cmd.Flags().GetBool("template")
|
||||
|
||||
for _, boolErr := range []error{errLicense, errModelfile, errParams, errSystem, errTemplate} {
|
||||
if boolErr != nil {
|
||||
return errors.New("error retrieving flags")
|
||||
}
|
||||
}
|
||||
|
||||
flagsSet := 0
|
||||
showType := ""
|
||||
|
||||
if license {
|
||||
flagsSet++
|
||||
showType = "license"
|
||||
}
|
||||
|
||||
if modelfile {
|
||||
flagsSet++
|
||||
showType = "modelfile"
|
||||
}
|
||||
|
||||
if parameters {
|
||||
flagsSet++
|
||||
showType = "parameters"
|
||||
}
|
||||
|
||||
if system {
|
||||
flagsSet++
|
||||
showType = "system"
|
||||
}
|
||||
|
||||
if template {
|
||||
flagsSet++
|
||||
showType = "template"
|
||||
}
|
||||
|
||||
if flagsSet > 1 {
|
||||
return errors.New("only one of '--license', '--modelfile', '--parameters', '--system', or '--template' can be specified")
|
||||
} else if flagsSet == 0 {
|
||||
return errors.New("one of '--license', '--modelfile', '--parameters', '--system', or '--template' must be specified")
|
||||
}
|
||||
|
||||
req := api.ShowRequest{Name: args[0]}
|
||||
resp, err := client.Show(context.Background(), &req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
switch showType {
|
||||
case "license":
|
||||
fmt.Println(resp.License)
|
||||
case "modelfile":
|
||||
fmt.Println(resp.Modelfile)
|
||||
case "parameters":
|
||||
fmt.Println(resp.Parameters)
|
||||
case "system":
|
||||
fmt.Println(resp.System)
|
||||
case "template":
|
||||
fmt.Println(resp.Template)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func CopyHandler(cmd *cobra.Command, args []string) error {
|
||||
client, err := api.FromEnv()
|
||||
if err != nil {
|
||||
@@ -377,20 +455,6 @@ func generate(cmd *cobra.Command, model, prompt string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func showLayer(l *server.Layer) {
|
||||
filename, err := server.GetBlobsPath(l.Digest)
|
||||
if err != nil {
|
||||
fmt.Println("Couldn't get layer's path")
|
||||
return
|
||||
}
|
||||
bts, err := os.ReadFile(filename)
|
||||
if err != nil {
|
||||
fmt.Println("Couldn't read layer")
|
||||
return
|
||||
}
|
||||
fmt.Println(string(bts))
|
||||
}
|
||||
|
||||
func generateInteractive(cmd *cobra.Command, model string) error {
|
||||
home, err := os.UserHomeDir()
|
||||
if err != nil {
|
||||
@@ -413,6 +477,8 @@ func generateInteractive(cmd *cobra.Command, model string) error {
|
||||
),
|
||||
readline.PcItem("/show",
|
||||
readline.PcItem("license"),
|
||||
readline.PcItem("modelfile"),
|
||||
readline.PcItem("parameters"),
|
||||
readline.PcItem("system"),
|
||||
readline.PcItem("template"),
|
||||
),
|
||||
@@ -522,42 +588,28 @@ func generateInteractive(cmd *cobra.Command, model string) error {
|
||||
case strings.HasPrefix(line, "/show"):
|
||||
args := strings.Fields(line)
|
||||
if len(args) > 1 {
|
||||
mp := server.ParseModelPath(model)
|
||||
resp, err := server.GetModelInfo(model)
|
||||
if err != nil {
|
||||
return err
|
||||
fmt.Println("error: couldn't get model")
|
||||
continue
|
||||
}
|
||||
|
||||
manifest, _, err := server.GetManifest(mp)
|
||||
if err != nil {
|
||||
fmt.Println("error: couldn't get a manifest for this model")
|
||||
continue
|
||||
}
|
||||
switch args[1] {
|
||||
case "license":
|
||||
for _, l := range manifest.Layers {
|
||||
if l.MediaType == "application/vnd.ollama.image.license" {
|
||||
showLayer(l)
|
||||
}
|
||||
}
|
||||
continue
|
||||
fmt.Println(resp.License)
|
||||
case "modelfile":
|
||||
fmt.Println(resp.Modelfile)
|
||||
case "parameters":
|
||||
fmt.Println(resp.Parameters)
|
||||
case "system":
|
||||
for _, l := range manifest.Layers {
|
||||
if l.MediaType == "application/vnd.ollama.image.system" {
|
||||
showLayer(l)
|
||||
}
|
||||
}
|
||||
continue
|
||||
fmt.Println(resp.System)
|
||||
case "template":
|
||||
for _, l := range manifest.Layers {
|
||||
if l.MediaType == "application/vnd.ollama.image.template" {
|
||||
showLayer(l)
|
||||
}
|
||||
}
|
||||
continue
|
||||
fmt.Println(resp.Template)
|
||||
default:
|
||||
usage()
|
||||
continue
|
||||
fmt.Println("error: unknown command")
|
||||
}
|
||||
|
||||
continue
|
||||
} else {
|
||||
usage()
|
||||
continue
|
||||
@@ -749,6 +801,20 @@ func NewCLI() *cobra.Command {
|
||||
|
||||
createCmd.Flags().StringP("file", "f", "Modelfile", "Name of the Modelfile (default \"Modelfile\")")
|
||||
|
||||
showCmd := &cobra.Command{
|
||||
Use: "show MODEL",
|
||||
Short: "Show information for a model",
|
||||
Args: cobra.MinimumNArgs(1),
|
||||
PreRunE: checkServerHeartbeat,
|
||||
RunE: ShowHandler,
|
||||
}
|
||||
|
||||
showCmd.Flags().Bool("license", false, "Show license of a model")
|
||||
showCmd.Flags().Bool("modelfile", false, "Show Modelfile of a model")
|
||||
showCmd.Flags().Bool("parameters", false, "Show parameters of a model")
|
||||
showCmd.Flags().Bool("template", false, "Show template of a model")
|
||||
showCmd.Flags().Bool("system", false, "Show system prompt of a model")
|
||||
|
||||
runCmd := &cobra.Command{
|
||||
Use: "run MODEL [PROMPT]",
|
||||
Short: "Run a model",
|
||||
@@ -814,6 +880,7 @@ func NewCLI() *cobra.Command {
|
||||
rootCmd.AddCommand(
|
||||
serveCmd,
|
||||
createCmd,
|
||||
showCmd,
|
||||
runCmd,
|
||||
pullCmd,
|
||||
pushCmd,
|
||||
|
||||
@@ -238,6 +238,10 @@ Generate embeddings from a model
|
||||
- `model`: name of model to generate embeddings from
|
||||
- `prompt`: text to generate embeddings for
|
||||
|
||||
Advanced parameters:
|
||||
|
||||
- `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature`
|
||||
|
||||
### Request
|
||||
|
||||
```
|
||||
|
||||
@@ -10,15 +10,11 @@ package format
|
||||
|
||||
import (
|
||||
"crypto"
|
||||
"crypto/ecdsa"
|
||||
"crypto/ed25519"
|
||||
"crypto/elliptic"
|
||||
"crypto/rand"
|
||||
"crypto/rsa"
|
||||
"encoding/binary"
|
||||
"encoding/pem"
|
||||
"fmt"
|
||||
"math/big"
|
||||
|
||||
"golang.org/x/crypto/ssh"
|
||||
)
|
||||
@@ -41,25 +37,6 @@ type openSSHPrivateKey struct {
|
||||
Rest []byte `ssh:"rest"`
|
||||
}
|
||||
|
||||
type openSSHRSAPrivateKey struct {
|
||||
N *big.Int
|
||||
E *big.Int
|
||||
D *big.Int
|
||||
Iqmp *big.Int
|
||||
P *big.Int
|
||||
Q *big.Int
|
||||
Comment string
|
||||
Pad []byte `ssh:"rest"`
|
||||
}
|
||||
|
||||
type openSSHECDSAPrivateKey struct {
|
||||
Curve string
|
||||
Pub []byte
|
||||
D *big.Int
|
||||
Comment string
|
||||
Pad []byte `ssh:"rest"`
|
||||
}
|
||||
|
||||
type openSSHEd25519PrivateKey struct {
|
||||
Pub []byte
|
||||
Priv []byte
|
||||
@@ -85,64 +62,6 @@ func OpenSSHPrivateKey(key crypto.PrivateKey, comment string) (*pem.Block, error
|
||||
}
|
||||
|
||||
switch k := key.(type) {
|
||||
case *rsa.PrivateKey:
|
||||
e := new(big.Int).SetInt64(int64(k.E))
|
||||
|
||||
key := openSSHRSAPrivateKey{
|
||||
N: k.N,
|
||||
E: e,
|
||||
D: k.D,
|
||||
Iqmp: k.Precomputed.Qinv,
|
||||
P: k.Primes[0],
|
||||
Q: k.Primes[1],
|
||||
Comment: comment,
|
||||
}
|
||||
|
||||
pk1.Keytype = ssh.KeyAlgoRSA
|
||||
pk1.Rest = ssh.Marshal(key)
|
||||
|
||||
w.PubKey = ssh.Marshal(struct {
|
||||
KeyType string
|
||||
E *big.Int
|
||||
N *big.Int
|
||||
}{
|
||||
ssh.KeyAlgoRSA, e, k.N,
|
||||
})
|
||||
case *ecdsa.PrivateKey:
|
||||
var curve, keytype string
|
||||
switch name := k.Curve.Params().Name; name {
|
||||
case "P-256":
|
||||
curve = "nistp256"
|
||||
keytype = ssh.KeyAlgoECDSA256
|
||||
case "P-384":
|
||||
curve = "nistp384"
|
||||
keytype = ssh.KeyAlgoECDSA384
|
||||
case "P-521":
|
||||
curve = "nistp521"
|
||||
keytype = ssh.KeyAlgoECDSA521
|
||||
default:
|
||||
return nil, fmt.Errorf("ssh: unknown curve %q", name)
|
||||
}
|
||||
|
||||
pub := elliptic.Marshal(k.Curve, k.X, k.Y)
|
||||
|
||||
key := openSSHECDSAPrivateKey{
|
||||
Curve: curve,
|
||||
Pub: pub,
|
||||
D: k.D,
|
||||
Comment: comment,
|
||||
}
|
||||
|
||||
pk1.Keytype = keytype
|
||||
pk1.Rest = ssh.Marshal(key)
|
||||
|
||||
w.PubKey = ssh.Marshal(struct {
|
||||
KeyType string
|
||||
Curve string
|
||||
Pub []byte
|
||||
}{
|
||||
keytype, curve, pub,
|
||||
})
|
||||
case ed25519.PrivateKey:
|
||||
pub, priv := k[32:], k
|
||||
key := openSSHEd25519PrivateKey{
|
||||
|
||||
1
go.mod
1
go.mod
@@ -39,6 +39,7 @@ require (
|
||||
github.com/ugorji/go/codec v1.2.11 // indirect
|
||||
golang.org/x/arch v0.3.0 // indirect
|
||||
golang.org/x/crypto v0.10.0
|
||||
golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63
|
||||
golang.org/x/net v0.10.0 // indirect
|
||||
golang.org/x/sys v0.11.0 // indirect
|
||||
golang.org/x/term v0.10.0
|
||||
|
||||
2
go.sum
2
go.sum
@@ -121,6 +121,8 @@ golang.org/x/crypto v0.0.0-20210711020723-a769d52b0f97/go.mod h1:GvvjBRRGRdwPK5y
|
||||
golang.org/x/crypto v0.10.0 h1:LKqV2xt9+kDzSTfOhx4FrkEBcMrAgHSYgzywV9zcGmM=
|
||||
golang.org/x/crypto v0.10.0/go.mod h1:o4eNf7Ede1fv+hwOwZsTHl9EsPFO6q6ZvYR8vYfY45I=
|
||||
golang.org/x/exp v0.0.0-20230321023759-10a507213a29 h1:ooxPy7fPvB4kwsA2h+iBNHkAbp/4JxTSwCmvdjEYmug=
|
||||
golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63 h1:m64FZMko/V45gv0bNmrNYoDEq8U5YUhetc9cBWKS1TQ=
|
||||
golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63/go.mod h1:0v4NqG35kSWCMzLaMeX+IQrlSnVE/bqGSyC2cz/9Le8=
|
||||
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
|
||||
golang.org/x/net v0.10.0 h1:X2//UzNDwYmtCLn7To6G58Wr6f5ahEAQgKNzv9Y951M=
|
||||
golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
|
||||
|
||||
@@ -45,6 +45,7 @@ func osPath(llamaPath string) string {
|
||||
if runtime.GOOS == "windows" {
|
||||
return path.Join(llamaPath, "Release")
|
||||
}
|
||||
|
||||
return llamaPath
|
||||
}
|
||||
|
||||
@@ -68,7 +69,9 @@ func initGGML() {
|
||||
case "windows":
|
||||
files = []string{"server.exe"}
|
||||
case "darwin":
|
||||
files = append(files, "ggml-metal.metal")
|
||||
if llamaPath == osPath(ggmlGPU) {
|
||||
files = append(files, "ggml-metal.metal")
|
||||
}
|
||||
}
|
||||
|
||||
for _, f := range files {
|
||||
@@ -286,8 +289,8 @@ func newLlama(model string, adapters []string, runner ModelRunner, opts api.Opti
|
||||
runner.Path,
|
||||
append(params, "--port", strconv.Itoa(port))...,
|
||||
)
|
||||
var stderr bytes.Buffer
|
||||
cmd.Stderr = &stderr
|
||||
cmd.Stdout = os.Stderr
|
||||
cmd.Stderr = os.Stderr
|
||||
|
||||
llm := &llama{Options: opts, Running: Running{Port: port, Cmd: cmd, Cancel: cancel}}
|
||||
|
||||
@@ -353,11 +356,6 @@ func (llm *llama) SetOptions(opts api.Options) {
|
||||
llm.Options = opts
|
||||
}
|
||||
|
||||
type Prediction struct {
|
||||
Content string `json:"content"`
|
||||
Stop bool `json:"stop"`
|
||||
}
|
||||
|
||||
type GenerationSettings struct {
|
||||
FrequencyPenalty float64 `json:"frequency_penalty"`
|
||||
IgnoreEOS bool `json:"ignore_eos"`
|
||||
@@ -385,31 +383,19 @@ type GenerationSettings struct {
|
||||
}
|
||||
|
||||
type Timings struct {
|
||||
PredictedMS float64 `json:"predicted_ms"`
|
||||
PredictedN int `json:"predicted_n"`
|
||||
PredictedPerSecond float64 `json:"predicted_per_second"`
|
||||
PredictedPerTokenMS float64 `json:"predicted_per_token_ms"`
|
||||
PromptMS float64 `json:"prompt_ms"`
|
||||
PromptN int `json:"prompt_n"`
|
||||
PromptPerSecond float64 `json:"prompt_per_second"`
|
||||
PromptPerTokenMS float64 `json:"prompt_per_token_ms"`
|
||||
PredictedN int `json:"predicted_n"`
|
||||
PredictedMS float64 `json:"predicted_ms"`
|
||||
PromptN int `json:"prompt_n"`
|
||||
PromptMS float64 `json:"prompt_ms"`
|
||||
}
|
||||
|
||||
type PredictComplete struct {
|
||||
Content string `json:"content"`
|
||||
GenerationSettings GenerationSettings `json:"generation_settings"`
|
||||
Model string `json:"model"`
|
||||
Prompt string `json:"prompt"`
|
||||
Stop bool `json:"stop"`
|
||||
StoppedEOS bool `json:"stopped_eos"`
|
||||
StoppedLimit bool `json:"stopped_limit"`
|
||||
StoppedWord bool `json:"stopped_word"`
|
||||
StoppingWord string `json:"stopping_word"`
|
||||
Timings Timings `json:"timings"`
|
||||
TokensCached int `json:"tokens_cached"`
|
||||
TokensEvaluated int `json:"tokens_evaluated"`
|
||||
TokensPredicted int `json:"tokens_predicted"`
|
||||
Truncated bool `json:"truncated"`
|
||||
type Prediction struct {
|
||||
Content string `json:"content"`
|
||||
Model string `json:"model"`
|
||||
Prompt string `json:"prompt"`
|
||||
Stop bool `json:"stop"`
|
||||
|
||||
Timings `json:"timings"`
|
||||
}
|
||||
|
||||
type PredictRequest struct {
|
||||
@@ -437,15 +423,19 @@ type PredictRequest struct {
|
||||
Stop []string `json:"stop,omitempty"`
|
||||
}
|
||||
|
||||
func (llm *llama) Predict(ctx context.Context, predictCtx []int, prompt string, fn func(api.GenerateResponse)) error {
|
||||
// we need to find the trimmed prompt context before predicting so that we can return it to the client
|
||||
trimmedPrompt, err := llm.marshalPrompt(ctx, predictCtx, prompt)
|
||||
func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string, fn func(api.GenerateResponse)) error {
|
||||
prevConvo, err := llm.Decode(ctx, prevContext)
|
||||
if err != nil {
|
||||
return fmt.Errorf("marshaling prompt: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
var nextContext strings.Builder
|
||||
nextContext.WriteString(prevConvo)
|
||||
nextContext.WriteString(prompt)
|
||||
|
||||
endpoint := fmt.Sprintf("http://127.0.0.1:%d/completion", llm.Port)
|
||||
predReq := PredictRequest{
|
||||
Prompt: trimmedPrompt,
|
||||
Prompt: nextContext.String(),
|
||||
Stream: true,
|
||||
NPredict: llm.NumPredict,
|
||||
NKeep: llm.NumKeep,
|
||||
@@ -491,7 +481,6 @@ func (llm *llama) Predict(ctx context.Context, predictCtx []int, prompt string,
|
||||
}
|
||||
|
||||
scanner := bufio.NewScanner(resp.Body)
|
||||
genCtx := trimmedPrompt // start with the trimmed prompt
|
||||
for scanner.Scan() {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
@@ -506,34 +495,33 @@ func (llm *llama) Predict(ctx context.Context, predictCtx []int, prompt string,
|
||||
// Read data from the server-side event stream
|
||||
if strings.HasPrefix(line, "data: ") {
|
||||
evt := line[6:]
|
||||
var complete PredictComplete
|
||||
if err := json.Unmarshal([]byte(evt), &complete); err != nil {
|
||||
return fmt.Errorf("error unmarshaling llm complete response: %v", err)
|
||||
var p Prediction
|
||||
if err := json.Unmarshal([]byte(evt), &p); err != nil {
|
||||
return fmt.Errorf("error unmarshaling llm prediction response: %v", err)
|
||||
}
|
||||
|
||||
if complete.Timings.PredictedMS > 0 {
|
||||
genCtx += complete.Content
|
||||
embd, err := llm.Encode(ctx, genCtx)
|
||||
if p.Content != "" {
|
||||
fn(api.GenerateResponse{Response: p.Content})
|
||||
nextContext.WriteString(p.Content)
|
||||
}
|
||||
|
||||
if p.Stop {
|
||||
embd, err := llm.Encode(ctx, nextContext.String())
|
||||
if err != nil {
|
||||
return fmt.Errorf("encoding context: %v", err)
|
||||
}
|
||||
|
||||
fn(api.GenerateResponse{
|
||||
Done: true,
|
||||
Context: embd,
|
||||
PromptEvalCount: int(complete.Timings.PromptN),
|
||||
PromptEvalDuration: parseDurationMs(float64(complete.Timings.PromptMS)),
|
||||
EvalCount: int(complete.Timings.PredictedN),
|
||||
EvalDuration: parseDurationMs(float64(complete.Timings.PredictedMS)),
|
||||
PromptEvalCount: p.PromptN,
|
||||
PromptEvalDuration: parseDurationMs(p.PromptMS),
|
||||
EvalCount: p.PredictedN,
|
||||
EvalDuration: parseDurationMs(p.PredictedMS),
|
||||
})
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
var pred Prediction
|
||||
if err := json.Unmarshal([]byte(evt), &pred); err != nil {
|
||||
return fmt.Errorf("error unmarshaling llm prediction response: %v", err)
|
||||
}
|
||||
genCtx += pred.Content
|
||||
fn(api.GenerateResponse{Response: pred.Content})
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -545,34 +533,6 @@ func (llm *llama) Predict(ctx context.Context, predictCtx []int, prompt string,
|
||||
return nil
|
||||
}
|
||||
|
||||
func (llm *llama) marshalPrompt(ctx context.Context, pCtx []int, prompt string) (string, error) {
|
||||
pEncode, err := llm.Encode(ctx, prompt)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("encoding prompt context: %w", err)
|
||||
}
|
||||
tokens := append(pCtx, pEncode...)
|
||||
if llm.NumKeep < 0 {
|
||||
llm.NumKeep = len(tokens)
|
||||
}
|
||||
|
||||
// min(llm.NumCtx - 4, llm.NumKeep)
|
||||
if llm.NumCtx-4 < llm.NumKeep {
|
||||
llm.NumKeep = llm.NumCtx - 4
|
||||
}
|
||||
|
||||
if len(tokens) >= llm.NumCtx {
|
||||
// truncate input
|
||||
numLeft := (llm.NumCtx - llm.NumKeep) / 2
|
||||
truncated := tokens[:llm.NumKeep]
|
||||
erasedBlocks := (len(tokens) - llm.NumKeep - numLeft - 1) / numLeft
|
||||
truncated = append(truncated, tokens[llm.NumKeep+erasedBlocks*numLeft:]...)
|
||||
tokens = truncated
|
||||
log.Printf("input truncated: num_ctx=%d num_keep=%d num_left=%d num_tokens=%d", llm.NumCtx, llm.NumKeep, numLeft, len(truncated))
|
||||
}
|
||||
|
||||
return llm.Decode(ctx, tokens)
|
||||
}
|
||||
|
||||
type TokenizeRequest struct {
|
||||
Content string `json:"content"`
|
||||
}
|
||||
|
||||
@@ -1,8 +1,13 @@
|
||||
//go:build !darwin
|
||||
// +build !darwin
|
||||
|
||||
package llm
|
||||
|
||||
//go:generate git submodule init
|
||||
//go:generate git submodule update --force ggml
|
||||
//go:generate git -C ggml apply ../ggml_patch/0001-add-detokenize-endpoint.patch
|
||||
//go:generate git -C ggml apply ../ggml_patch/0002-34B-model-support.patch
|
||||
//go:generate cmake -S ggml -B ggml/build/cpu -DLLAMA_K_QUANTS=on
|
||||
//go:generate git -C ggml apply ../ggml_patch/0003-metal-fix-synchronization-in-new-matrix-multiplicati.patch
|
||||
//go:generate git -C ggml apply ../ggml_patch/0004-metal-add-missing-barriers-for-mul-mat-2699.patch
|
||||
//go:generate cmake --fresh -S ggml -B ggml/build/cpu -DLLAMA_K_QUANTS=on
|
||||
//go:generate cmake --build ggml/build/cpu --target server --config Release
|
||||
|
||||
@@ -1,11 +0,0 @@
|
||||
//go:build darwin
|
||||
// +build darwin
|
||||
|
||||
package llm
|
||||
|
||||
//go:generate git submodule init
|
||||
//go:generate git submodule update --force ggml
|
||||
//go:generate git -C ggml apply ../ggml_patch/0001-add-detokenize-endpoint.patch
|
||||
//go:generate git -C ggml apply ../ggml_patch/0002-34B-model-support.patch
|
||||
//go:generate cmake -S ggml -B ggml/build/gpu -DLLAMA_METAL=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on
|
||||
//go:generate cmake --build ggml/build/gpu --target server --config Release
|
||||
10
llm/llama.cpp/generate_darwin_amd64.go
Normal file
10
llm/llama.cpp/generate_darwin_amd64.go
Normal file
@@ -0,0 +1,10 @@
|
||||
package llm
|
||||
|
||||
//go:generate git submodule init
|
||||
//go:generate git submodule update --force ggml
|
||||
//go:generate git -C ggml apply ../ggml_patch/0001-add-detokenize-endpoint.patch
|
||||
//go:generate git -C ggml apply ../ggml_patch/0002-34B-model-support.patch
|
||||
//go:generate git -C ggml apply ../ggml_patch/0003-metal-fix-synchronization-in-new-matrix-multiplicati.patch
|
||||
//go:generate git -C ggml apply ../ggml_patch/0004-metal-add-missing-barriers-for-mul-mat-2699.patch
|
||||
//go:generate cmake --fresh -S ggml -B ggml/build/cpu -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0
|
||||
//go:generate cmake --build ggml/build/cpu --target server --config Release
|
||||
10
llm/llama.cpp/generate_darwin_arm64.go
Normal file
10
llm/llama.cpp/generate_darwin_arm64.go
Normal file
@@ -0,0 +1,10 @@
|
||||
package llm
|
||||
|
||||
//go:generate git submodule init
|
||||
//go:generate git submodule update --force ggml
|
||||
//go:generate git -C ggml apply ../ggml_patch/0001-add-detokenize-endpoint.patch
|
||||
//go:generate git -C ggml apply ../ggml_patch/0002-34B-model-support.patch
|
||||
//go:generate git -C ggml apply ../ggml_patch/0003-metal-fix-synchronization-in-new-matrix-multiplicati.patch
|
||||
//go:generate git -C ggml apply ../ggml_patch/0004-metal-add-missing-barriers-for-mul-mat-2699.patch
|
||||
//go:generate cmake --fresh -S ggml -B ggml/build/gpu -DLLAMA_METAL=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=arm64 -DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0
|
||||
//go:generate cmake --build ggml/build/gpu --target server --config Release
|
||||
@@ -0,0 +1,32 @@
|
||||
From 8c0ea847ac1460bca534d92266e3471cb31471be Mon Sep 17 00:00:00 2001
|
||||
From: Bruce MacDonald <brucewmacdonald@gmail.com>
|
||||
Date: Tue, 5 Sep 2023 16:05:08 -0400
|
||||
Subject: [PATCH] metal: add missing barriers for mul-mat #2699
|
||||
|
||||
---
|
||||
ggml-metal.metal | 2 ++
|
||||
1 file changed, 2 insertions(+)
|
||||
|
||||
diff --git a/ggml-metal.metal b/ggml-metal.metal
|
||||
index 3f31252..ce3541f 100644
|
||||
--- a/ggml-metal.metal
|
||||
+++ b/ggml-metal.metal
|
||||
@@ -1850,6 +1850,7 @@ kernel void kernel_mul_mm(device const uchar * src0,
|
||||
//load data and store to threadgroup memory
|
||||
half4x4 temp_a;
|
||||
dequantize_func(x, il, temp_a);
|
||||
+ threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
#pragma unroll(16)
|
||||
for (int i = 0; i < 16; i++) {
|
||||
*(sa + SG_MAT_SIZE * ((tiitg / THREAD_PER_ROW / 8) \
|
||||
@@ -1895,6 +1896,7 @@ kernel void kernel_mul_mm(device const uchar * src0,
|
||||
}
|
||||
} else {
|
||||
// block is smaller than 64x32, we should avoid writing data outside of the matrix
|
||||
+ threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
threadgroup float *temp_str = ((threadgroup float *)shared_memory) \
|
||||
+ 32 * (sgitg&1) + (16 * (sgitg>>1)) * BLOCK_SIZE_M;
|
||||
for (int i = 0; i < 8; i++) {
|
||||
--
|
||||
2.39.2 (Apple Git-143)
|
||||
|
||||
@@ -0,0 +1,30 @@
|
||||
From dadbed99e65252d79f81101a392d0d6497b86caa Mon Sep 17 00:00:00 2001
|
||||
From: Shouzheng Liu <lshzh.hi@gmail.com>
|
||||
Date: Mon, 21 Aug 2023 06:59:29 -0400
|
||||
Subject: [PATCH] metal : fix synchronization in new matrix multiplication
|
||||
kernel (#2686)
|
||||
|
||||
---
|
||||
ggml-metal.metal | 3 ++-
|
||||
1 file changed, 2 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/ggml-metal.metal b/ggml-metal.metal
|
||||
index 3f31252..88d48f6 100644
|
||||
--- a/ggml-metal.metal
|
||||
+++ b/ggml-metal.metal
|
||||
@@ -1898,10 +1898,11 @@ kernel void kernel_mul_mm(device const uchar * src0,
|
||||
threadgroup float *temp_str = ((threadgroup float *)shared_memory) \
|
||||
+ 32 * (sgitg&1) + (16 * (sgitg>>1)) * BLOCK_SIZE_M;
|
||||
for (int i = 0; i < 8; i++) {
|
||||
+ threadgroup_barrier(mem_flags::mem_device);
|
||||
simdgroup_store(c_res[i], temp_str + 8 * (i%4) + 8 * BLOCK_SIZE_M * (i/4), BLOCK_SIZE_M);
|
||||
}
|
||||
|
||||
- threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
+ threadgroup_barrier(mem_flags::mem_device);
|
||||
device float *C = dst + BLOCK_SIZE_M * r0 + (BLOCK_SIZE_N * r1) * ne0 + im*ne1*ne0;
|
||||
if (sgitg==0) {
|
||||
for (int i = 0; i < n_rows; i++) {
|
||||
--
|
||||
2.41.0
|
||||
|
||||
@@ -0,0 +1,41 @@
|
||||
From 14b1d7e6f720dee41ce5a826376df738096d9033 Mon Sep 17 00:00:00 2001
|
||||
From: Shouzheng Liu <lshzh.hi@gmail.com>
|
||||
Date: Tue, 22 Aug 2023 02:18:40 -0400
|
||||
Subject: [PATCH] metal : add missing barriers for mul-mat (#2699)
|
||||
|
||||
---
|
||||
ggml-metal.metal | 5 +++--
|
||||
1 file changed, 3 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/ggml-metal.metal b/ggml-metal.metal
|
||||
index 88d48f6..ce3541f 100644
|
||||
--- a/ggml-metal.metal
|
||||
+++ b/ggml-metal.metal
|
||||
@@ -1850,6 +1850,7 @@ kernel void kernel_mul_mm(device const uchar * src0,
|
||||
//load data and store to threadgroup memory
|
||||
half4x4 temp_a;
|
||||
dequantize_func(x, il, temp_a);
|
||||
+ threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
#pragma unroll(16)
|
||||
for (int i = 0; i < 16; i++) {
|
||||
*(sa + SG_MAT_SIZE * ((tiitg / THREAD_PER_ROW / 8) \
|
||||
@@ -1895,14 +1896,14 @@ kernel void kernel_mul_mm(device const uchar * src0,
|
||||
}
|
||||
} else {
|
||||
// block is smaller than 64x32, we should avoid writing data outside of the matrix
|
||||
+ threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
threadgroup float *temp_str = ((threadgroup float *)shared_memory) \
|
||||
+ 32 * (sgitg&1) + (16 * (sgitg>>1)) * BLOCK_SIZE_M;
|
||||
for (int i = 0; i < 8; i++) {
|
||||
- threadgroup_barrier(mem_flags::mem_device);
|
||||
simdgroup_store(c_res[i], temp_str + 8 * (i%4) + 8 * BLOCK_SIZE_M * (i/4), BLOCK_SIZE_M);
|
||||
}
|
||||
|
||||
- threadgroup_barrier(mem_flags::mem_device);
|
||||
+ threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
device float *C = dst + BLOCK_SIZE_M * r0 + (BLOCK_SIZE_N * r1) * ne0 + im*ne1*ne0;
|
||||
if (sgitg==0) {
|
||||
for (int i = 0; i < n_rows; i++) {
|
||||
--
|
||||
2.41.0
|
||||
|
||||
@@ -6,8 +6,11 @@ GO_LDFLAGS="-X github.com/jmorganca/ollama/version.Version=$VERSION"
|
||||
GO_LDFLAGS="$GO_LDFLAGS -X github.com/jmorganca/ollama/server.mode=release"
|
||||
|
||||
# build universal binary
|
||||
CGO_ENABLED=1 GOARCH=arm64 go build -ldflags "$GO_LDFLAGS" -o dist/ollama-darwin-arm64
|
||||
CGO_ENABLED=1 GOARCH=amd64 go build -ldflags "$GO_LDFLAGS" -o dist/ollama-darwin-amd64
|
||||
GOARCH=arm64 go generate ./...
|
||||
GOARCH=arm64 go build -ldflags "$GO_LDFLAGS" -o dist/ollama-darwin-arm64
|
||||
rm -rf llm/llama.cpp/ggml/build/*/bin
|
||||
GOARCH=amd64 go generate ./...
|
||||
GOARCH=amd64 go build -ldflags "$GO_LDFLAGS" -o dist/ollama-darwin-amd64
|
||||
lipo -create -output dist/ollama dist/ollama-darwin-arm64 dist/ollama-darwin-amd64
|
||||
rm dist/ollama-darwin-amd64 dist/ollama-darwin-arm64
|
||||
codesign --deep --force --options=runtime --sign "$APPLE_IDENTITY" --timestamp dist/ollama
|
||||
|
||||
150
server/images.go
150
server/images.go
@@ -22,6 +22,8 @@ import (
|
||||
"strings"
|
||||
"text/template"
|
||||
|
||||
"golang.org/x/exp/slices"
|
||||
|
||||
"github.com/jmorganca/ollama/api"
|
||||
"github.com/jmorganca/ollama/llm"
|
||||
"github.com/jmorganca/ollama/parser"
|
||||
@@ -39,15 +41,18 @@ type RegistryOptions struct {
|
||||
}
|
||||
|
||||
type Model struct {
|
||||
Name string `json:"name"`
|
||||
ModelPath string
|
||||
AdapterPaths []string
|
||||
Template string
|
||||
System string
|
||||
Digest string
|
||||
ConfigDigest string
|
||||
Options map[string]interface{}
|
||||
Embeddings []vector.Embedding
|
||||
Name string `json:"name"`
|
||||
ShortName string
|
||||
ModelPath string
|
||||
OriginalModel string
|
||||
AdapterPaths []string
|
||||
Template string
|
||||
System string
|
||||
License []string
|
||||
Digest string
|
||||
ConfigDigest string
|
||||
Options map[string]interface{}
|
||||
Embeddings []vector.Embedding
|
||||
}
|
||||
|
||||
func (m *Model) Prompt(request api.GenerateRequest, embedding string) (string, error) {
|
||||
@@ -111,6 +116,7 @@ type LayerReader struct {
|
||||
type ConfigV2 struct {
|
||||
ModelFamily llm.ModelFamily `json:"model_family"`
|
||||
ModelType string `json:"model_type"`
|
||||
ModelFormat string `json:"model_format"`
|
||||
FileType string `json:"file_type"`
|
||||
RootFS RootFS `json:"rootfs"`
|
||||
|
||||
@@ -169,9 +175,11 @@ func GetModel(name string) (*Model, error) {
|
||||
|
||||
model := &Model{
|
||||
Name: mp.GetFullTagname(),
|
||||
ShortName: mp.GetShortTagname(),
|
||||
Digest: digest,
|
||||
ConfigDigest: manifest.Config.Digest,
|
||||
Template: "{{ .Prompt }}",
|
||||
License: []string{},
|
||||
}
|
||||
|
||||
for _, layer := range manifest.Layers {
|
||||
@@ -183,6 +191,7 @@ func GetModel(name string) (*Model, error) {
|
||||
switch layer.MediaType {
|
||||
case "application/vnd.ollama.image.model":
|
||||
model.ModelPath = filename
|
||||
model.OriginalModel = layer.From
|
||||
case "application/vnd.ollama.image.embed":
|
||||
file, err := os.Open(filename)
|
||||
if err != nil {
|
||||
@@ -227,6 +236,12 @@ func GetModel(name string) (*Model, error) {
|
||||
if err = json.NewDecoder(params).Decode(&model.Options); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
case "application/vnd.ollama.image.license":
|
||||
bts, err := os.ReadFile(filename)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
model.License = append(model.License, string(bts))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -274,6 +289,7 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api
|
||||
|
||||
var layers []*LayerReader
|
||||
params := make(map[string][]string)
|
||||
var sourceParams map[string]any
|
||||
embed := EmbeddingParams{fn: fn}
|
||||
for _, c := range commands {
|
||||
log.Printf("[%s] - %s\n", c.Name, c.Args)
|
||||
@@ -320,6 +336,7 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api
|
||||
|
||||
config.ModelFamily = ggml.ModelFamily()
|
||||
config.ModelType = ggml.ModelType().String()
|
||||
config.ModelFormat = ggml.Name()
|
||||
config.FileType = ggml.FileType().String()
|
||||
|
||||
// reset the file
|
||||
@@ -351,12 +368,30 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api
|
||||
return err
|
||||
}
|
||||
|
||||
// copie the model metadata
|
||||
// copy the model metadata
|
||||
config.ModelFamily = source.ModelFamily
|
||||
config.ModelType = source.ModelType
|
||||
config.ModelFormat = source.ModelFormat
|
||||
config.FileType = source.FileType
|
||||
|
||||
for _, l := range mf.Layers {
|
||||
if l.MediaType == "application/vnd.ollama.image.params" {
|
||||
sourceParamsBlobPath, err := GetBlobsPath(l.Digest)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
sourceParamsBlob, err := os.Open(sourceParamsBlobPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer sourceParamsBlob.Close()
|
||||
|
||||
if err := json.NewDecoder(sourceParamsBlob).Decode(&sourceParams); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
newLayer, err := GetLayerWithBufferFromLayer(l)
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -427,12 +462,19 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api
|
||||
// Create a single layer for the parameters
|
||||
if len(params) > 0 {
|
||||
fn(api.ProgressResponse{Status: "creating parameter layer"})
|
||||
|
||||
layers = removeLayerFromLayers(layers, "application/vnd.ollama.image.params")
|
||||
formattedParams, err := formatParams(params)
|
||||
if err != nil {
|
||||
return fmt.Errorf("couldn't create params json: %v", err)
|
||||
}
|
||||
|
||||
for k, v := range sourceParams {
|
||||
if _, ok := formattedParams[k]; !ok {
|
||||
formattedParams[k] = v
|
||||
}
|
||||
}
|
||||
|
||||
bts, err := json.Marshal(formattedParams)
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -630,14 +672,9 @@ func existingFileEmbeddings(digest string) (map[string][]float64, error) {
|
||||
}
|
||||
|
||||
func removeLayerFromLayers(layers []*LayerReader, mediaType string) []*LayerReader {
|
||||
j := 0
|
||||
for _, l := range layers {
|
||||
if l.MediaType != mediaType {
|
||||
layers[j] = l
|
||||
j++
|
||||
}
|
||||
}
|
||||
return layers[:j]
|
||||
return slices.DeleteFunc(layers, func(layer *LayerReader) bool {
|
||||
return layer.MediaType == mediaType
|
||||
})
|
||||
}
|
||||
|
||||
func SaveLayers(layers []*LayerReader, fn func(resp api.ProgressResponse), force bool) error {
|
||||
@@ -911,6 +948,83 @@ func DeleteModel(name string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func ShowModelfile(model *Model) (string, error) {
|
||||
type modelTemplate struct {
|
||||
*Model
|
||||
From string
|
||||
Params string
|
||||
}
|
||||
|
||||
var params []string
|
||||
for k, v := range model.Options {
|
||||
switch val := v.(type) {
|
||||
case string:
|
||||
params = append(params, fmt.Sprintf("PARAMETER %s %s", k, val))
|
||||
case int:
|
||||
params = append(params, fmt.Sprintf("PARAMETER %s %s", k, strconv.Itoa(val)))
|
||||
case float64:
|
||||
params = append(params, fmt.Sprintf("PARAMETER %s %s", k, strconv.FormatFloat(val, 'f', 0, 64)))
|
||||
case bool:
|
||||
params = append(params, fmt.Sprintf("PARAMETER %s %s", k, strconv.FormatBool(val)))
|
||||
case []interface{}:
|
||||
for _, nv := range val {
|
||||
switch nval := nv.(type) {
|
||||
case string:
|
||||
params = append(params, fmt.Sprintf("PARAMETER %s %s", k, nval))
|
||||
case int:
|
||||
params = append(params, fmt.Sprintf("PARAMETER %s %s", k, strconv.Itoa(nval)))
|
||||
case float64:
|
||||
params = append(params, fmt.Sprintf("PARAMETER %s %s", k, strconv.FormatFloat(nval, 'f', 0, 64)))
|
||||
case bool:
|
||||
params = append(params, fmt.Sprintf("PARAMETER %s %s", k, strconv.FormatBool(nval)))
|
||||
default:
|
||||
log.Printf("unknown type: %s", reflect.TypeOf(nv).String())
|
||||
}
|
||||
}
|
||||
default:
|
||||
log.Printf("unknown type: %s", reflect.TypeOf(v).String())
|
||||
}
|
||||
}
|
||||
|
||||
mt := modelTemplate{
|
||||
Model: model,
|
||||
From: model.OriginalModel,
|
||||
Params: strings.Join(params, "\n"),
|
||||
}
|
||||
|
||||
if mt.From == "" {
|
||||
mt.From = model.ModelPath
|
||||
}
|
||||
|
||||
modelFile := `# Modelfile generated by "ollama show"
|
||||
# To build a new Modelfile based on this one, replace the FROM line with:
|
||||
# FROM {{ .ShortName }}
|
||||
|
||||
FROM {{ .From }}
|
||||
TEMPLATE """{{ .Template }}"""
|
||||
SYSTEM """{{ .System }}"""
|
||||
{{ .Params }}
|
||||
`
|
||||
for _, l := range mt.Model.AdapterPaths {
|
||||
modelFile += fmt.Sprintf("ADAPTER %s\n", l)
|
||||
}
|
||||
|
||||
tmpl, err := template.New("").Parse(modelFile)
|
||||
if err != nil {
|
||||
log.Printf("error parsing template: %q", err)
|
||||
return "", err
|
||||
}
|
||||
|
||||
var buf bytes.Buffer
|
||||
|
||||
if err = tmpl.Execute(&buf, mt); err != nil {
|
||||
log.Printf("error executing template: %q", err)
|
||||
return "", err
|
||||
}
|
||||
|
||||
return buf.String(), nil
|
||||
}
|
||||
|
||||
func PushModel(ctx context.Context, name string, regOpts *RegistryOptions, fn func(api.ProgressResponse)) error {
|
||||
mp := ParseModelPath(name)
|
||||
fn(api.ProgressResponse{Status: "retrieving manifest"})
|
||||
|
||||
@@ -114,7 +114,12 @@ func GetManifestPath() (string, error) {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return filepath.Join(home, ".ollama", "models", "manifests"), nil
|
||||
path := filepath.Join(home, ".ollama", "models", "manifests")
|
||||
if err := os.MkdirAll(path, 0o755); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return path, nil
|
||||
}
|
||||
|
||||
func GetBlobsPath(digest string) (string, error) {
|
||||
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
"os/signal"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"syscall"
|
||||
@@ -117,12 +118,13 @@ func load(ctx context.Context, model *Model, reqOpts map[string]interface{}, ses
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
tokensNoSystem, err := llmModel.Encode(ctx, promptNoSystem)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
opts.NumKeep = len(tokensWithSystem) - len(tokensNoSystem) + 1
|
||||
opts.NumKeep = len(tokensWithSystem) - len(tokensNoSystem)
|
||||
|
||||
llmModel.SetOptions(opts)
|
||||
}
|
||||
@@ -363,6 +365,77 @@ func DeleteModelHandler(c *gin.Context) {
|
||||
}
|
||||
}
|
||||
|
||||
func ShowModelHandler(c *gin.Context) {
|
||||
var req api.ShowRequest
|
||||
if err := c.ShouldBindJSON(&req); err != nil {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
resp, err := GetModelInfo(req.Name)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Name)})
|
||||
} else {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, resp)
|
||||
}
|
||||
|
||||
func GetModelInfo(name string) (*api.ShowResponse, error) {
|
||||
model, err := GetModel(name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
resp := &api.ShowResponse{
|
||||
License: strings.Join(model.License, "\n"),
|
||||
System: model.System,
|
||||
Template: model.Template,
|
||||
}
|
||||
|
||||
mf, err := ShowModelfile(model)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
resp.Modelfile = mf
|
||||
|
||||
var params []string
|
||||
cs := 30
|
||||
for k, v := range model.Options {
|
||||
switch val := v.(type) {
|
||||
case string:
|
||||
params = append(params, fmt.Sprintf("%-*s %s", cs, k, val))
|
||||
case int:
|
||||
params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.Itoa(val)))
|
||||
case float64:
|
||||
params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.FormatFloat(val, 'f', 0, 64)))
|
||||
case bool:
|
||||
params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.FormatBool(val)))
|
||||
case []interface{}:
|
||||
for _, nv := range val {
|
||||
switch nval := nv.(type) {
|
||||
case string:
|
||||
params = append(params, fmt.Sprintf("%-*s %s", cs, k, nval))
|
||||
case int:
|
||||
params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.Itoa(nval)))
|
||||
case float64:
|
||||
params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.FormatFloat(nval, 'f', 0, 64)))
|
||||
case bool:
|
||||
params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.FormatBool(nval)))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
resp.Parameters = strings.Join(params, "\n")
|
||||
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
func ListModelsHandler(c *gin.Context) {
|
||||
var models []api.ModelResponse
|
||||
fp, err := GetManifestPath()
|
||||
@@ -456,6 +529,7 @@ func Serve(ln net.Listener, origins []string) error {
|
||||
r.POST("/api/copy", CopyModelHandler)
|
||||
r.GET("/api/tags", ListModelsHandler)
|
||||
r.DELETE("/api/delete", DeleteModelHandler)
|
||||
r.POST("/api/show", ShowModelHandler)
|
||||
|
||||
log.Printf("Listening on %s", ln.Addr())
|
||||
s := &http.Server{
|
||||
|
||||
Reference in New Issue
Block a user