...

types/model: add FilepathNoBuild
Also, add test for DisplayLongest. Also, plumb fill param to ParseName in MustParseName
2026-02-27 12:36:54 -05:00 · 2024-04-17 17:04:13 -07:00 · 2024-04-16 12:37:38 -07:00
60 changed files with 290 additions and 262 deletions
--- a/.github/ISSUE_TEMPLATE/10_bug_report.yml
+++ b/.github/ISSUE_TEMPLATE/10_bug_report.yml
@@ -1,60 +0,0 @@
-name: Bug report
-labels: [bug]
-description: Something isn't working right.
-body:
-  - type: textarea
-    id: description
-    attributes:
-      label: What is the issue?
-      description: What happened? What did you expect to happen?
-    validations:
-      required: true
-  - type: dropdown
-    id: os
-    attributes:
-      label: OS
-      description: Which operating system are you using?
-      multiple: true
-      options:
-        - Linux
-        - macOS
-        - Windows
-        - Docker
-        - WSL2
-    validations:
-      required: false
-  - type: dropdown
-    id: gpu
-    attributes:
-      label: GPU
-      description: Which GPU are you using?
-      multiple: true
-      options:
-        - Nvidia
-        - AMD
-        - Intel
-        - Apple
-        - Other
-    validations:
-      required: false
-  - type: dropdown
-    id: cpu
-    attributes:
-      label: CPU
-      description: Which CPU are you using?
-      multiple: true
-      options:
-        - Intel
-        - AMD
-        - Apple
-        - Other
-    validations:
-      required: false
-  - type: input
-    id: version
-    attributes:
-      label: Ollama version
-      description: What version of Ollama are you using? (`ollama --version`)
-      placeholder: e.g., 0.1.32
-    validations:
-      required: false
--- a/.github/ISSUE_TEMPLATE/10_model_request.yml
+++ b/.github/ISSUE_TEMPLATE/10_model_request.yml
@@ -0,0 +1,18 @@
+name: Model request
+description: Request a new model for the library
+labels: [mr]
+body:
+  - type: markdown
+    attributes:
+      value: |
+        Please check if your Model request is [already available](https://ollama.com/search) or that you cannot [import it](https://github.com/ollama/ollama/blob/main/docs/import.md#import-a-model) yourself.
+        Tell us about which Model you'd like to see in the library!
+  - type: textarea
+    id: problem
+    attributes:
+      label: What model would you like?
+      description: Please provide a link to the model.
+  - type: markdown
+    attributes:
+      value: |
+        Thanks for filing a model request!
--- a/.github/ISSUE_TEMPLATE/20_feature_request.md
+++ b/.github/ISSUE_TEMPLATE/20_feature_request.md
@@ -1,6 +0,0 @@
---
-name: Feature request
-about: Request a new feature
-labels: feature request
---
-
--- a/.github/ISSUE_TEMPLATE/20_feature_request.yml
+++ b/.github/ISSUE_TEMPLATE/20_feature_request.yml
@@ -0,0 +1,41 @@
+name: Feature request
+description: Propose a new feature
+labels: [needs-triage, fr]
+body:
+  - type: markdown
+    attributes:
+      value: |
+        Please check if your feature request is [already filed](https://github.com/ollama/ollama/issues).
+        Tell us about your idea!
+  - type: textarea
+    id: problem
+    attributes:
+      label: What are you trying to do?
+      description: Tell us about the problem you're trying to solve.
+    validations:
+      required: false
+  - type: textarea
+    id: solution
+    attributes:
+      label: How should we solve this?
+      description: If you have an idea of how you'd like to see this feature work, let us know.
+    validations:
+      required: false
+  - type: textarea
+    id: alternative
+    attributes:
+      label: What is the impact of not solving this?
+      description: (How) Are you currently working around the issue?
+    validations:
+      required: false
+  - type: textarea
+    id: context
+    attributes:
+      label: Anything else?
+      description: Any additional context to share, e.g., links
+    validations:
+      required: false
+  - type: markdown
+    attributes:
+      value: |
+        Thanks for filing a feature request!
--- a/.github/ISSUE_TEMPLATE/30_model_request.md
+++ b/.github/ISSUE_TEMPLATE/30_model_request.md
@@ -1,5 +0,0 @@
---
-name: Model request
-about: Request support for a new model to be added to Ollama
-labels: model request
---
--- a/.github/ISSUE_TEMPLATE/90_bug_report.yml
+++ b/.github/ISSUE_TEMPLATE/90_bug_report.yml
@@ -0,0 +1,125 @@
+name: Bug report
+description: File a bug report. If you need help, please join our Discord server.
+labels: [needs-triage, bug]
+body:
+  - type: markdown
+    attributes:
+      value: |
+        Please check if your bug is [already filed](https://github.com/ollama/ollama/issues) before filing a new one.
+  - type: textarea
+    id: what-happened
+    attributes:
+      label: What is the issue?
+      description: What happened? What did you expect to happen?
+    validations:
+      required: true
+  - type: textarea
+    id: what-was-expected
+    attributes:
+      label: What did you expect to see?
+      description: What did you expect to see/happen instead?
+    validations:
+      required: false
+  - type: textarea
+    id: steps
+    attributes:
+      label: Steps to reproduce
+      description: What are the steps you took that hit this issue?
+    validations:
+      required: false
+  - type: textarea
+    id: changes
+    attributes:
+      label: Are there any recent changes that introduced the issue?
+      description: If so, what are those changes?
+    validations:
+      required: false
+  - type: dropdown
+    id: os
+    attributes:
+      label: OS
+      description: What OS are you using? You may select more than one.
+      multiple: true
+      options:
+        - Linux
+        - macOS
+        - Windows
+        - Other
+    validations:
+      required: false
+  - type: dropdown
+    id: architecture
+    attributes:
+      label: Architecture
+      description: What architecture are you using? You may select more than one.
+      multiple: true
+      options:
+        - arm64
+        - amd64
+        - x86
+        - Other
+  - type: dropdown
+    id: platform
+    attributes:
+      label: Platform
+      description: What platform are you using? You may select more than one.
+      multiple: true
+      options:
+        - Docker
+        - WSL
+        - WSL2
+    validations:
+      required: false
+  - type: input
+    id: ollama-version
+    attributes:
+      label: Ollama version
+      description: What Ollama version are you using? (`ollama --version`)
+      placeholder: e.g., 1.14.4
+    validations:
+      required: false
+  - type: dropdown
+    id: gpu
+    attributes:
+      label: GPU
+      description: What GPU, if any, are you using? You may select more than one.
+      multiple: true
+      options:
+        - Nvidia
+        - AMD
+        - Intel
+        - Apple
+        - Other
+    validations:
+      required: false
+  - type: textarea
+    id: gpu-info
+    attributes:
+      label: GPU info
+      description: What GPU info do you have? (`nvidia-smi`, `rocminfo`, `system_profiler SPDisplaysDataType`, etc.)
+    validations:
+      required: false
+  - type: dropdown
+    id: cpu
+    attributes:
+      label: CPU
+      description: What CPU are you using? You may select more than one.
+      multiple: true
+      options:
+        - Intel
+        - AMD
+        - Apple
+        - Other
+    validations:
+      required: false
+  - type: textarea
+    id: other-software
+    attributes:
+      label: Other software
+      description: What other software are you using that might be related to this issue?
+    validations:
+      required: false
+  - type: markdown
+    attributes:
+      value: |
+        Thanks for filing a bug report!
--- a/README.md
+++ b/README.md
@@ -60,6 +60,7 @@ Here are some example models that can be downloaded:
 | Llama 2 13B        | 13B        | 7.3GB | `ollama run llama2:13b`        |
 | Llama 2 70B        | 70B        | 39GB  | `ollama run llama2:70b`        |
 | Orca Mini          | 3B         | 1.9GB | `ollama run orca-mini`         |
+| Vicuna             | 7B         | 3.8GB | `ollama run vicuna`            |
 | LLaVA              | 7B         | 4.5GB | `ollama run llava`             |
 | Gemma              | 2B         | 1.4GB | `ollama run gemma:2b`          |
 | Gemma              | 7B         | 4.8GB | `ollama run gemma:7b`          |
@@ -377,6 +378,3 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [Page Assist](https://github.com/n4ze3m/page-assist) (Chrome Extension)
 - [AI Telegram Bot](https://github.com/tusharhero/aitelegrambot) (Telegram bot using Ollama in backend)
 - [AI ST Completion](https://github.com/yaroslavyaroslav/OpenAI-sublime-text) (Sublime Text 4 AI assistant plugin with Ollama support)
-
-### Supported backends 
- [llama.cpp](https://github.com/ggerganov/llama.cpp) project founded by Georgi Gerganov. 
--- a/api/client.go
+++ b/api/client.go
@@ -20,8 +20,8 @@ import (
 	"runtime"
 	"strings"

-	"ollama.com/format"
-	"ollama.com/version"
+	"github.com/ollama/ollama/format"
+	"github.com/ollama/ollama/version"
 )

 // Client encapsulates client state for interacting with the ollama
--- a/app/lifecycle/lifecycle.go
+++ b/app/lifecycle/lifecycle.go
@@ -9,8 +9,8 @@ import (
 	"os/signal"
 	"syscall"

-	"ollama.com/app/store"
-	"ollama.com/app/tray"
+	"github.com/ollama/ollama/app/store"
+	"github.com/ollama/ollama/app/tray"
 )

 func Run() {
--- a/app/lifecycle/server.go
+++ b/app/lifecycle/server.go
@@ -11,7 +11,7 @@ import (
 	"path/filepath"
 	"time"

-	"ollama.com/api"
+	"github.com/ollama/ollama/api"
 )

 func getCLIFullPath(command string) string {
--- a/app/lifecycle/updater.go
+++ b/app/lifecycle/updater.go
@@ -18,8 +18,8 @@ import (
 	"strings"
 	"time"

-	"ollama.com/auth"
-	"ollama.com/version"
+	"github.com/ollama/ollama/auth"
+	"github.com/ollama/ollama/version"
 )

 var (
--- a/app/main.go
+++ b/app/main.go
@@ -4,7 +4,7 @@ package main
 // go build -ldflags="-H windowsgui" .

 import (
-	"ollama.com/app/lifecycle"
+	"github.com/ollama/ollama/app/lifecycle"
 )

 func main() {
--- a/app/tray/tray.go
+++ b/app/tray/tray.go
@@ -4,8 +4,8 @@ import (
 	"fmt"
 	"runtime"

-	"ollama.com/app/assets"
-	"ollama.com/app/tray/commontray"
+	"github.com/ollama/ollama/app/assets"
+	"github.com/ollama/ollama/app/tray/commontray"
 )

 func NewTray() (commontray.OllamaTray, error) {
--- a/app/tray/tray_nonwindows.go
+++ b/app/tray/tray_nonwindows.go
@@ -5,7 +5,7 @@ package tray
 import (
 	"fmt"

-	"ollama.com/app/tray/commontray"
+	"github.com/ollama/ollama/app/tray/commontray"
 )

 func InitPlatformTray(icon, updateIcon []byte) (commontray.OllamaTray, error) {
--- a/app/tray/tray_windows.go
+++ b/app/tray/tray_windows.go
@@ -1,8 +1,8 @@
 package tray

 import (
-	"ollama.com/app/tray/commontray"
-	"ollama.com/app/tray/wintray"
+	"github.com/ollama/ollama/app/tray/commontray"
+	"github.com/ollama/ollama/app/tray/wintray"
 )

 func InitPlatformTray(icon, updateIcon []byte) (commontray.OllamaTray, error) {
--- a/app/tray/wintray/tray.go
+++ b/app/tray/wintray/tray.go
@@ -13,8 +13,8 @@ import (
 	"sync"
 	"unsafe"

+	"github.com/ollama/ollama/app/tray/commontray"
 	"golang.org/x/sys/windows"
-	"ollama.com/app/tray/commontray"
 )

 // Helpful sources: https://github.com/golang/exp/blob/master/shiny/driver/internal/win32
--- a/cmd/cmd.go
+++ b/cmd/cmd.go
@@ -30,12 +30,12 @@ import (
 	"golang.org/x/exp/slices"
 	"golang.org/x/term"

-	"ollama.com/api"
-	"ollama.com/format"
-	"ollama.com/parser"
-	"ollama.com/progress"
-	"ollama.com/server"
-	"ollama.com/version"
+	"github.com/ollama/ollama/api"
+	"github.com/ollama/ollama/format"
+	"github.com/ollama/ollama/parser"
+	"github.com/ollama/ollama/progress"
+	"github.com/ollama/ollama/server"
+	"github.com/ollama/ollama/version"
 )

 func CreateHandler(cmd *cobra.Command, args []string) error {
--- a/cmd/interactive.go
+++ b/cmd/interactive.go
@@ -14,9 +14,9 @@ import (
 	"github.com/spf13/cobra"
 	"golang.org/x/exp/slices"

-	"ollama.com/api"
-	"ollama.com/progress"
-	"ollama.com/readline"
+	"github.com/ollama/ollama/api"
+	"github.com/ollama/ollama/progress"
+	"github.com/ollama/ollama/readline"
 )

 type MultilineState int
--- a/cmd/interactive_test.go
+++ b/cmd/interactive_test.go
@@ -7,7 +7,7 @@ import (

 	"github.com/stretchr/testify/assert"

-	"ollama.com/api"
+	"github.com/ollama/ollama/api"
 )

 func TestExtractFilenames(t *testing.T) {
--- a/cmd/start_darwin.go
+++ b/cmd/start_darwin.go
@@ -7,7 +7,7 @@ import (
 	"os/exec"
 	"strings"

-	"ollama.com/api"
+	"github.com/ollama/ollama/api"
 )

 func startApp(ctx context.Context, client *api.Client) error {
--- a/cmd/start_default.go
+++ b/cmd/start_default.go
@@ -6,7 +6,7 @@ import (
 	"context"
 	"fmt"

-	"ollama.com/api"
+	"github.com/ollama/ollama/api"
 )

 func startApp(ctx context.Context, client *api.Client) error {
--- a/cmd/start_windows.go
+++ b/cmd/start_windows.go
@@ -10,7 +10,7 @@ import (
 	"strings"
 	"syscall"

-	"ollama.com/api"
+	"github.com/ollama/ollama/api"
 )

 func startApp(ctx context.Context, client *api.Client) error {
--- a/convert/convert.go
+++ b/convert/convert.go
@@ -13,8 +13,8 @@ import (

 	"google.golang.org/protobuf/proto"

-	"ollama.com/convert/sentencepiece"
-	"ollama.com/llm"
+	"github.com/ollama/ollama/convert/sentencepiece"
+	"github.com/ollama/ollama/llm"
 )

 type Params struct {
--- a/convert/gemma.go
+++ b/convert/gemma.go
@@ -12,7 +12,7 @@ import (
 	"github.com/pdevine/tensor"
 	"github.com/pdevine/tensor/native"

-	"ollama.com/llm"
+	"github.com/ollama/ollama/llm"
 )

 type GemmaModel struct {
--- a/convert/llama.go
+++ b/convert/llama.go
@@ -14,7 +14,7 @@ import (
 	"github.com/pdevine/tensor/native"
 	"github.com/x448/float16"

-	"ollama.com/llm"
+	"github.com/ollama/ollama/llm"
 )

 type LlamaModel struct {
--- a/convert/mistral.go
+++ b/convert/mistral.go
@@ -13,7 +13,7 @@ import (
 	"github.com/pdevine/tensor/native"
 	"github.com/x448/float16"

-	"ollama.com/llm"
+	"github.com/ollama/ollama/llm"
 )

 type MistralModel struct {
--- a/convert/safetensors.go
+++ b/convert/safetensors.go
@@ -16,7 +16,7 @@ import (
 	"github.com/mitchellh/mapstructure"
 	"github.com/x448/float16"

-	"ollama.com/llm"
+	"github.com/ollama/ollama/llm"
 )

 type safetensorWriterTo struct {
--- a/convert/torch.go
+++ b/convert/torch.go
@@ -15,7 +15,7 @@ import (
 	"github.com/nlpodyssey/gopickle/types"
 	"github.com/x448/float16"

-	"ollama.com/llm"
+	"github.com/ollama/ollama/llm"
 )

 type torchWriterTo struct {
--- a/examples/go-chat/main.go
+++ b/examples/go-chat/main.go
@@ -5,7 +5,7 @@ import (
 	"fmt"
 	"log"

-	"ollama.com/api"
+	"github.com/ollama/ollama/api"
 )

 func main() {
--- a/examples/go-generate-streaming/main.go
+++ b/examples/go-generate-streaming/main.go
@@ -5,7 +5,7 @@ import (
 	"fmt"
 	"log"

-	"ollama.com/api"
+	"github.com/ollama/ollama/api"
 )

 func main() {
--- a/examples/go-generate/main.go
+++ b/examples/go-generate/main.go
@@ -5,7 +5,7 @@ import (
 	"fmt"
 	"log"

-	"ollama.com/api"
+	"github.com/ollama/ollama/api"
 )

 func main() {
--- a/examples/go-multimodal/main.go
+++ b/examples/go-multimodal/main.go
@@ -6,7 +6,7 @@ import (
 	"log"
 	"os"

-	"ollama.com/api"
+	"github.com/ollama/ollama/api"
 )

 func main() {
--- a/examples/go-pull-progress/main.go
+++ b/examples/go-pull-progress/main.go
@@ -5,7 +5,7 @@ import (
 	"fmt"
 	"log"

-	"ollama.com/api"
+	"github.com/ollama/ollama/api"
 )

 func main() {
--- a/go.mod
+++ b/go.mod
@@ -1,4 +1,4 @@
-module ollama.com
+module github.com/ollama/ollama

 go 1.22

--- a/gpu/gpu.go
+++ b/gpu/gpu.go
@@ -21,7 +21,7 @@ import (
 	"sync"
 	"unsafe"

-	"ollama.com/format"
+	"github.com/ollama/ollama/format"
 )

 type handles struct {
--- a/gpu/gpu_darwin.go
+++ b/gpu/gpu_darwin.go
@@ -55,6 +55,6 @@ func getCPUMem() (memInfo, error) {
 	return memInfo{
 		TotalMemory: uint64(C.getPhysicalMemory()),
 		FreeMemory:  0,
-		DeviceCount: 1,
+		DeviceCount: 0,
 	}, nil
 }
--- a/integration/basic_test.go
+++ b/integration/basic_test.go
@@ -8,7 +8,7 @@ import (
 	"testing"
 	"time"

-	"ollama.com/api"
+	"github.com/ollama/ollama/api"
 )

 func TestOrcaMiniBlueSky(t *testing.T) {
--- a/integration/context_test.go
+++ b/integration/context_test.go
@@ -8,7 +8,7 @@ import (
 	"testing"
 	"time"

-	"ollama.com/api"
+	"github.com/ollama/ollama/api"
 )

 func TestContextExhaustion(t *testing.T) {
--- a/integration/llm_image_test.go
+++ b/integration/llm_image_test.go
@@ -9,8 +9,8 @@ import (
 	"testing"
 	"time"

+	"github.com/ollama/ollama/api"
 	"github.com/stretchr/testify/require"
-	"ollama.com/api"
 )

 func TestIntegrationMultimodal(t *testing.T) {
--- a/integration/llm_test.go
+++ b/integration/llm_test.go
@@ -9,7 +9,7 @@ import (
 	"testing"
 	"time"

-	"ollama.com/api"
+	"github.com/ollama/ollama/api"
 )

 // TODO - this would ideally be in the llm package, but that would require some refactoring of interfaces in the server
--- a/integration/utils_test.go
+++ b/integration/utils_test.go
@@ -21,9 +21,9 @@ import (
 	"testing"
 	"time"

+	"github.com/ollama/ollama/api"
+	"github.com/ollama/ollama/app/lifecycle"
 	"github.com/stretchr/testify/assert"
-	"ollama.com/api"
-	"ollama.com/app/lifecycle"
 )

 func FindPort() string {
--- a/llm/ext_server/server.cpp
+++ b/llm/ext_server/server.cpp
@@ -39,10 +39,6 @@
 #include "httplib.h"
 #include "json.hpp"

-#if defined(_WIN32)
-#include <windows.h>
-#endif
-
 #include <cstddef>
 #include <thread>
 #include <chrono>
@@ -2774,28 +2770,8 @@ inline void signal_handler(int signal) {
    shutdown_handler(signal);
 }

-#if defined(_WIN32)
-char* wchar_to_char(const wchar_t* wstr) {
-    if (wstr == nullptr) return nullptr;
-
-    // Determine the number of bytes needed for the UTF-8 string
-    int bytes = WideCharToMultiByte(CP_UTF8, 0, wstr, -1, nullptr, 0, nullptr, nullptr);
-    char* str = new char[bytes];
-
-    // Convert the wide-character string to a UTF-8 string
-    WideCharToMultiByte(CP_UTF8, 0, wstr, -1, str, bytes, nullptr, nullptr);
-    return str;
-}
-
-int wmain(int argc, wchar_t **wargv) {
-    char** argv = new char*[argc];
-    for (int i = 0; i < argc; ++i) {
-        argv[i] = wchar_to_char(wargv[i]);
-    }
-#else
-int main(int argc, char **argv) {
-#endif
-
+int main(int argc, char **argv)
+{
 #if SERVER_VERBOSE != 1
    log_disable();
 #endif
@@ -3306,11 +3282,6 @@ int main(int argc, char **argv) {
        return (ctrl_type == CTRL_C_EVENT) ? (signal_handler(SIGINT), true) : false;
    };
    SetConsoleCtrlHandler(reinterpret_cast<PHANDLER_ROUTINE>(console_ctrl_handler), true);
-
-    for (int i = 0; i < argc; ++i) {
-        delete[] argv[i];
-    }
-    delete[] argv;
 #endif
    llama.queue_tasks.start_loop();
    svr.stop();
--- a/llm/ggml.go
+++ b/llm/ggml.go
@@ -164,8 +164,7 @@ func (ts Tensors) Layers() map[string]Layer {
 	for _, t := range ts {
 		parts := strings.Split(t.Name, ".")
 		if parts[0] == "blk" {
-			// join first and second part, e.g. blk.%d
-			parts = append([]string{fmt.Sprintf("%s.%s", parts[0], parts[1])}, parts[2:]...)
+			parts = parts[1:]
 		}

 		if _, ok := layers[parts[0]]; !ok {
@@ -381,12 +380,6 @@ func (llm GGML) GraphSize(context, batch uint64) (partialOffload, fullOffload ui
 		)

 		partialOffload = 4*batch*(2*embedding+vocab) + embedding*vocab*105/128
-	case "stablelm":
-		fullOffload = 4 * batch * (context*(1+heads) + 3*embedding + 2)
-		partialOffload = max(
-			4*batch*(vocab+2*embedding),
-			fullOffload,
-		)
 	}

 	return
--- a/llm/gguf.go
+++ b/llm/gguf.go
@@ -248,17 +248,13 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error {
 	}

 	padding := llm.padding(offset, int64(alignment))
-	if _, err := rs.Seek(padding, io.SeekCurrent); err != nil {
+	if _, err := rs.Seek(padding-offset, io.SeekCurrent); err != nil {
 		return err
 	}

 	for _, tensor := range llm.tensors {
-		if _, err := rs.Seek(int64(tensor.size()), io.SeekCurrent); err != nil {
-			return err
-		}
-
-		padding := llm.padding(int64(tensor.size()), int64(alignment))
-		if _, err := rs.Seek(padding, io.SeekCurrent); err != nil {
+		padded := (int64(tensor.size()) + int64(alignment) - 1) & ^(int64(alignment) - 1)
+		if _, err := rs.Seek(padded, io.SeekCurrent); err != nil {
 			return err
 		}
 	}
@@ -627,9 +623,8 @@ func (llm *gguf) Encode(ws io.WriteSeeker, kv KV, tensors []Tensor) error {
 		return err
 	}

-	var alignment int64 = 32
-	padding := llm.padding(offset, alignment)
-	if err := binary.Write(ws, llm.ByteOrder, bytes.Repeat([]byte{0}, int(padding))); err != nil {
+	padding := llm.padding(offset, 32)
+	if err := binary.Write(ws, llm.ByteOrder, bytes.Repeat([]byte{0}, int(padding-offset))); err != nil {
 		return err
 	}

@@ -643,8 +638,8 @@ func (llm *gguf) Encode(ws io.WriteSeeker, kv KV, tensors []Tensor) error {
 			return err
 		}

-		padding := llm.padding(offset, alignment)
-		if err := binary.Write(ws, llm.ByteOrder, bytes.Repeat([]byte{0}, int(padding))); err != nil {
+		padding := llm.padding(offset, 32)
+		if err := binary.Write(ws, llm.ByteOrder, bytes.Repeat([]byte{0}, int(padding-offset))); err != nil {
 			return err
 		}
 	}
@@ -653,5 +648,5 @@ func (llm *gguf) Encode(ws io.WriteSeeker, kv KV, tensors []Tensor) error {
 }

 func (gguf) padding(offset, align int64) int64 {
-	return (align - offset%align) % align
+	return (offset + align - 1) / align * align
 }
--- a/llm/payload.go
+++ b/llm/payload.go
@@ -14,7 +14,7 @@ import (
 	"golang.org/x/exp/slices"
 	"golang.org/x/sync/errgroup"

-	"ollama.com/gpu"
+	"github.com/ollama/ollama/gpu"
 )

 var errPayloadMissing = fmt.Errorf("expected payloads not included in this build of ollama")
--- a/llm/server.go
+++ b/llm/server.go
@@ -21,9 +21,9 @@ import (
 	"strings"
 	"time"

-	"ollama.com/api"
-	"ollama.com/format"
-	"ollama.com/gpu"
+	"github.com/ollama/ollama/api"
+	"github.com/ollama/ollama/format"
+	"github.com/ollama/ollama/gpu"
 )

 // LlamaServer is an instance of the llama.cpp server
@@ -79,9 +79,6 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
 		graphFullOffload = graphPartialOffload
 	}

-	graphFullOffload *= uint64(info.DeviceCount)
-	graphPartialOffload *= uint64(info.DeviceCount)
-
 	// memoryRequiredTotal represents the memory required for full GPU offloading (all layers)
 	memoryRequiredTotal := memoryMinimum + graphFullOffload

@@ -97,7 +94,7 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
 	var layerCount int
 	layers := ggml.Tensors().Layers()
 	for i := 0; i < int(ggml.KV().BlockCount()); i++ {
-		memoryLayer := layers[fmt.Sprintf("blk.%d", i)].size()
+		memoryLayer := layers[fmt.Sprintf("%d", i)].size()

 		// KV is proportional to the number of layers
 		memoryLayer += kv / ggml.KV().BlockCount()
@@ -109,13 +106,7 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
 		}
 	}

-	var memoryLayerOutput uint64
-	for k, v := range layers {
-		if !strings.HasPrefix(k, "blk.") {
-			memoryLayerOutput += v.size()
-		}
-	}
-
+	memoryLayerOutput := layers["output"].size()
 	memoryRequiredTotal += memoryLayerOutput

 	if info.Library == "metal" && memoryRequiredTotal > info.TotalMemory {
@@ -130,47 +121,16 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
 		opts.NumGPU = layerCount
 	}

-	memoryWeights := memoryRequiredTotal - memoryMinimum - graphFullOffload - kv
-
 	slog.Info(
 		"offload to gpu",
-		slog.Group(
-			"layers",
-			// actual number of layers offloaded
-			"real", opts.NumGPU,
-			// estimated number of layers that can be offloaded
-			"estimate", layerCount,
-		),
-		slog.Group(
-			"memory",
-			// memory available for offloading
-			"available", format.HumanBytes2(memoryAvailable),
-			slog.Group(
-				"required",
-				// memory required for full offloading
-				"full", format.HumanBytes2(memoryRequiredTotal),
-				// memory required to offload layers.estimate layers
-				"partial", format.HumanBytes2(memoryRequiredPartial),
-				// memory of KV cache
-				"kv", format.HumanBytes2(kv),
-			),
-			slog.Group(
-				"weights",
-				// memory of the weights
-				"total", format.HumanBytes2(memoryWeights),
-				// memory of repeating layers
-				"repeating", format.HumanBytes2(memoryWeights-memoryLayerOutput),
-				// memory of non-repeating layers
-				"nonrepeating", format.HumanBytes2(memoryLayerOutput),
-			),
-			slog.Group(
-				"graph",
-				// memory of graph when fully offloaded
-				"full", format.HumanBytes2(graphFullOffload),
-				// memory of graph when not fully offloaded
-				"partial", format.HumanBytes2(graphPartialOffload),
-			),
-		),
+		"reallayers", opts.NumGPU,
+		"layers", layerCount,
+		"required", format.HumanBytes2(memoryRequiredTotal),
+		"used", format.HumanBytes2(memoryRequiredPartial),
+		"available", format.HumanBytes2(memoryAvailable),
+		"kv", format.HumanBytes2(kv),
+		"fulloffload", format.HumanBytes2(graphFullOffload),
+		"partialoffload", format.HumanBytes2(graphPartialOffload),
 	)

 	if len(adapters) > 1 {
--- a/main.go
+++ b/main.go
@@ -3,8 +3,8 @@ package main
 import (
 	"context"

+	"github.com/ollama/ollama/cmd"
 	"github.com/spf13/cobra"
-	"ollama.com/cmd"
 )

 func main() {
--- a/openai/openai.go
+++ b/openai/openai.go
@@ -11,7 +11,7 @@ import (
 	"time"

 	"github.com/gin-gonic/gin"
-	"ollama.com/api"
+	"github.com/ollama/ollama/api"
 )

 type Error struct {
--- a/progress/bar.go
+++ b/progress/bar.go
@@ -6,8 +6,8 @@ import (
 	"strings"
 	"time"

+	"github.com/ollama/ollama/format"
 	"golang.org/x/term"
-	"ollama.com/format"
 )

 type Bar struct {
--- a/server/auth.go
+++ b/server/auth.go
@@ -15,8 +15,8 @@ import (
 	"strings"
 	"time"

-	"ollama.com/api"
-	"ollama.com/auth"
+	"github.com/ollama/ollama/api"
+	"github.com/ollama/ollama/auth"
 )

 type registryChallenge struct {
--- a/server/download.go
+++ b/server/download.go
@@ -21,8 +21,8 @@ import (

 	"golang.org/x/sync/errgroup"

-	"ollama.com/api"
-	"ollama.com/format"
+	"github.com/ollama/ollama/api"
+	"github.com/ollama/ollama/format"
 )

 const maxRetries = 6
--- a/server/images.go
+++ b/server/images.go
@@ -24,12 +24,12 @@ import (

 	"golang.org/x/exp/slices"

-	"ollama.com/api"
-	"ollama.com/convert"
-	"ollama.com/format"
-	"ollama.com/llm"
-	"ollama.com/parser"
-	"ollama.com/version"
+	"github.com/ollama/ollama/api"
+	"github.com/ollama/ollama/convert"
+	"github.com/ollama/ollama/format"
+	"github.com/ollama/ollama/llm"
+	"github.com/ollama/ollama/parser"
+	"github.com/ollama/ollama/version"
 )

 type registryOptions struct {
--- a/server/prompt.go
+++ b/server/prompt.go
@@ -7,7 +7,7 @@ import (
 	"text/template"
 	"text/template/parse"

-	"ollama.com/api"
+	"github.com/ollama/ollama/api"
 )

 // isResponseNode checks if the node contains .Response
--- a/server/prompt_test.go
+++ b/server/prompt_test.go
@@ -4,7 +4,7 @@ import (
 	"strings"
 	"testing"

-	"ollama.com/api"
+	"github.com/ollama/ollama/api"
 )

 func TestPrompt(t *testing.T) {
--- a/server/routes.go
+++ b/server/routes.go
@@ -27,12 +27,12 @@ import (
 	"github.com/gin-gonic/gin"
 	"golang.org/x/exp/slices"

-	"ollama.com/api"
-	"ollama.com/gpu"
-	"ollama.com/llm"
-	"ollama.com/openai"
-	"ollama.com/parser"
-	"ollama.com/version"
+	"github.com/ollama/ollama/api"
+	"github.com/ollama/ollama/gpu"
+	"github.com/ollama/ollama/llm"
+	"github.com/ollama/ollama/openai"
+	"github.com/ollama/ollama/parser"
+	"github.com/ollama/ollama/version"
 )

 var mode string = gin.DebugMode
--- a/server/routes_test.go
+++ b/server/routes_test.go
@@ -16,9 +16,9 @@ import (

 	"github.com/stretchr/testify/assert"

-	"ollama.com/api"
-	"ollama.com/parser"
-	"ollama.com/version"
+	"github.com/ollama/ollama/api"
+	"github.com/ollama/ollama/parser"
+	"github.com/ollama/ollama/version"
 )

 func Test_Routes(t *testing.T) {
--- a/server/upload.go
+++ b/server/upload.go
@@ -16,9 +16,9 @@ import (
 	"sync/atomic"
 	"time"

+	"github.com/ollama/ollama/api"
+	"github.com/ollama/ollama/format"
 	"golang.org/x/sync/errgroup"
-	"ollama.com/api"
-	"ollama.com/format"
 )

 var blobUploadManager sync.Map
--- a/types/model/digest.go
+++ b/types/model/digest.go
@@ -1,6 +1,7 @@
 package model

 import (
+	"fmt"
 	"log/slog"
 	"strings"
 	"unicode"
@@ -47,8 +48,11 @@ var (
 // Digest.
 func ParseDigest(s string) Digest {
 	typ, digest, ok := strings.Cut(s, "-")
+	if !ok {
+		typ, digest, ok = strings.Cut(s, ":")
+	}
 	if ok && isValidDigestType(typ) && isValidHex(digest) {
-		return Digest{s: s}
+		return Digest{s: fmt.Sprintf("%s-%s", typ, digest)}
 	}
 	return Digest{}
 }
--- a/types/model/name.go
+++ b/types/model/name.go
@@ -12,7 +12,7 @@ import (
 	"strings"
 	"sync"

-	"ollama.com/types/structs"
+	"github.com/ollama/ollama/types/structs"
 )

 // Errors
@@ -521,8 +521,6 @@ func parts(s string) iter_Seq2[PartKind, string] {
 						return
 					}
 					state, j, partLen = PartModel, i, 0
-				case PartHost:
-					// noop: support for host:port
 				default:
 					yield(PartExtraneous, s[i+1:j])
 					return
@@ -680,9 +678,6 @@ func isValidByteFor(kind PartKind, c byte) bool {
 	if kind == PartNamespace && c == '.' {
 		return false
 	}
-	if kind == PartHost && c == ':' {
-		return true
-	}
 	if c == '.' || c == '-' {
 		return true
 	}
--- a/types/model/name_test.go
+++ b/types/model/name_test.go
@@ -40,7 +40,6 @@ var testNames = map[string]fields{
 	"user/model":                     {namespace: "user", model: "model"},
 	"example.com/ns/mistral:7b+Q4_0": {host: "example.com", namespace: "ns", model: "mistral", tag: "7b", build: "Q4_0"},
 	"example.com/ns/mistral:7b+X":    {host: "example.com", namespace: "ns", model: "mistral", tag: "7b", build: "X"},
-	"localhost:5000/ns/mistral":      {host: "localhost:5000", namespace: "ns", model: "mistral"},

 	// invalid digest
 	"mistral:latest@invalid256-": {},
Author	SHA1	Message	Date
Blake Mizerany	cfd4152eb6	...	2024-04-17 17:04:13 -07:00
Blake Mizerany	0fbb379373	types/model: add FilepathNoBuild Also, add test for DisplayLongest. Also, plumb fill param to ParseName in MustParseName	2024-04-16 12:37:38 -07:00