mirror of
https://github.com/ollama/ollama.git
synced 2026-01-10 08:28:20 -05:00
Compare commits
2 Commits
ollama.com
...
bmizerany/
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cfd4152eb6 | ||
|
|
0fbb379373 |
60
.github/ISSUE_TEMPLATE/10_bug_report.yml
vendored
60
.github/ISSUE_TEMPLATE/10_bug_report.yml
vendored
@@ -1,60 +0,0 @@
|
||||
name: Bug report
|
||||
labels: [bug]
|
||||
description: Something isn't working right.
|
||||
body:
|
||||
- type: textarea
|
||||
id: description
|
||||
attributes:
|
||||
label: What is the issue?
|
||||
description: What happened? What did you expect to happen?
|
||||
validations:
|
||||
required: true
|
||||
- type: dropdown
|
||||
id: os
|
||||
attributes:
|
||||
label: OS
|
||||
description: Which operating system are you using?
|
||||
multiple: true
|
||||
options:
|
||||
- Linux
|
||||
- macOS
|
||||
- Windows
|
||||
- Docker
|
||||
- WSL2
|
||||
validations:
|
||||
required: false
|
||||
- type: dropdown
|
||||
id: gpu
|
||||
attributes:
|
||||
label: GPU
|
||||
description: Which GPU are you using?
|
||||
multiple: true
|
||||
options:
|
||||
- Nvidia
|
||||
- AMD
|
||||
- Intel
|
||||
- Apple
|
||||
- Other
|
||||
validations:
|
||||
required: false
|
||||
- type: dropdown
|
||||
id: cpu
|
||||
attributes:
|
||||
label: CPU
|
||||
description: Which CPU are you using?
|
||||
multiple: true
|
||||
options:
|
||||
- Intel
|
||||
- AMD
|
||||
- Apple
|
||||
- Other
|
||||
validations:
|
||||
required: false
|
||||
- type: input
|
||||
id: version
|
||||
attributes:
|
||||
label: Ollama version
|
||||
description: What version of Ollama are you using? (`ollama --version`)
|
||||
placeholder: e.g., 0.1.32
|
||||
validations:
|
||||
required: false
|
||||
18
.github/ISSUE_TEMPLATE/10_model_request.yml
vendored
Normal file
18
.github/ISSUE_TEMPLATE/10_model_request.yml
vendored
Normal file
@@ -0,0 +1,18 @@
|
||||
name: Model request
|
||||
description: Request a new model for the library
|
||||
labels: [mr]
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Please check if your Model request is [already available](https://ollama.com/search) or that you cannot [import it](https://github.com/ollama/ollama/blob/main/docs/import.md#import-a-model) yourself.
|
||||
Tell us about which Model you'd like to see in the library!
|
||||
- type: textarea
|
||||
id: problem
|
||||
attributes:
|
||||
label: What model would you like?
|
||||
description: Please provide a link to the model.
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Thanks for filing a model request!
|
||||
6
.github/ISSUE_TEMPLATE/20_feature_request.md
vendored
6
.github/ISSUE_TEMPLATE/20_feature_request.md
vendored
@@ -1,6 +0,0 @@
|
||||
---
|
||||
name: Feature request
|
||||
about: Request a new feature
|
||||
labels: feature request
|
||||
---
|
||||
|
||||
41
.github/ISSUE_TEMPLATE/20_feature_request.yml
vendored
Normal file
41
.github/ISSUE_TEMPLATE/20_feature_request.yml
vendored
Normal file
@@ -0,0 +1,41 @@
|
||||
name: Feature request
|
||||
description: Propose a new feature
|
||||
labels: [needs-triage, fr]
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Please check if your feature request is [already filed](https://github.com/ollama/ollama/issues).
|
||||
Tell us about your idea!
|
||||
- type: textarea
|
||||
id: problem
|
||||
attributes:
|
||||
label: What are you trying to do?
|
||||
description: Tell us about the problem you're trying to solve.
|
||||
validations:
|
||||
required: false
|
||||
- type: textarea
|
||||
id: solution
|
||||
attributes:
|
||||
label: How should we solve this?
|
||||
description: If you have an idea of how you'd like to see this feature work, let us know.
|
||||
validations:
|
||||
required: false
|
||||
- type: textarea
|
||||
id: alternative
|
||||
attributes:
|
||||
label: What is the impact of not solving this?
|
||||
description: (How) Are you currently working around the issue?
|
||||
validations:
|
||||
required: false
|
||||
- type: textarea
|
||||
id: context
|
||||
attributes:
|
||||
label: Anything else?
|
||||
description: Any additional context to share, e.g., links
|
||||
validations:
|
||||
required: false
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Thanks for filing a feature request!
|
||||
5
.github/ISSUE_TEMPLATE/30_model_request.md
vendored
5
.github/ISSUE_TEMPLATE/30_model_request.md
vendored
@@ -1,5 +0,0 @@
|
||||
---
|
||||
name: Model request
|
||||
about: Request support for a new model to be added to Ollama
|
||||
labels: model request
|
||||
---
|
||||
125
.github/ISSUE_TEMPLATE/90_bug_report.yml
vendored
Normal file
125
.github/ISSUE_TEMPLATE/90_bug_report.yml
vendored
Normal file
@@ -0,0 +1,125 @@
|
||||
name: Bug report
|
||||
description: File a bug report. If you need help, please join our Discord server.
|
||||
labels: [needs-triage, bug]
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Please check if your bug is [already filed](https://github.com/ollama/ollama/issues) before filing a new one.
|
||||
- type: textarea
|
||||
id: what-happened
|
||||
attributes:
|
||||
label: What is the issue?
|
||||
description: What happened? What did you expect to happen?
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: what-was-expected
|
||||
attributes:
|
||||
label: What did you expect to see?
|
||||
description: What did you expect to see/happen instead?
|
||||
validations:
|
||||
required: false
|
||||
- type: textarea
|
||||
id: steps
|
||||
attributes:
|
||||
label: Steps to reproduce
|
||||
description: What are the steps you took that hit this issue?
|
||||
validations:
|
||||
required: false
|
||||
- type: textarea
|
||||
id: changes
|
||||
attributes:
|
||||
label: Are there any recent changes that introduced the issue?
|
||||
description: If so, what are those changes?
|
||||
validations:
|
||||
required: false
|
||||
- type: dropdown
|
||||
id: os
|
||||
attributes:
|
||||
label: OS
|
||||
description: What OS are you using? You may select more than one.
|
||||
multiple: true
|
||||
options:
|
||||
- Linux
|
||||
- macOS
|
||||
- Windows
|
||||
- Other
|
||||
validations:
|
||||
required: false
|
||||
- type: dropdown
|
||||
id: architecture
|
||||
attributes:
|
||||
label: Architecture
|
||||
description: What architecture are you using? You may select more than one.
|
||||
multiple: true
|
||||
options:
|
||||
- arm64
|
||||
- amd64
|
||||
- x86
|
||||
- Other
|
||||
- type: dropdown
|
||||
id: platform
|
||||
attributes:
|
||||
label: Platform
|
||||
description: What platform are you using? You may select more than one.
|
||||
multiple: true
|
||||
options:
|
||||
- Docker
|
||||
- WSL
|
||||
- WSL2
|
||||
validations:
|
||||
required: false
|
||||
- type: input
|
||||
id: ollama-version
|
||||
attributes:
|
||||
label: Ollama version
|
||||
description: What Ollama version are you using? (`ollama --version`)
|
||||
placeholder: e.g., 1.14.4
|
||||
validations:
|
||||
required: false
|
||||
- type: dropdown
|
||||
id: gpu
|
||||
attributes:
|
||||
label: GPU
|
||||
description: What GPU, if any, are you using? You may select more than one.
|
||||
multiple: true
|
||||
options:
|
||||
- Nvidia
|
||||
- AMD
|
||||
- Intel
|
||||
- Apple
|
||||
- Other
|
||||
validations:
|
||||
required: false
|
||||
- type: textarea
|
||||
id: gpu-info
|
||||
attributes:
|
||||
label: GPU info
|
||||
description: What GPU info do you have? (`nvidia-smi`, `rocminfo`, `system_profiler SPDisplaysDataType`, etc.)
|
||||
validations:
|
||||
required: false
|
||||
- type: dropdown
|
||||
id: cpu
|
||||
attributes:
|
||||
label: CPU
|
||||
description: What CPU are you using? You may select more than one.
|
||||
multiple: true
|
||||
options:
|
||||
- Intel
|
||||
- AMD
|
||||
- Apple
|
||||
- Other
|
||||
validations:
|
||||
required: false
|
||||
- type: textarea
|
||||
id: other-software
|
||||
attributes:
|
||||
label: Other software
|
||||
description: What other software are you using that might be related to this issue?
|
||||
validations:
|
||||
required: false
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Thanks for filing a bug report!
|
||||
@@ -60,6 +60,7 @@ Here are some example models that can be downloaded:
|
||||
| Llama 2 13B | 13B | 7.3GB | `ollama run llama2:13b` |
|
||||
| Llama 2 70B | 70B | 39GB | `ollama run llama2:70b` |
|
||||
| Orca Mini | 3B | 1.9GB | `ollama run orca-mini` |
|
||||
| Vicuna | 7B | 3.8GB | `ollama run vicuna` |
|
||||
| LLaVA | 7B | 4.5GB | `ollama run llava` |
|
||||
| Gemma | 2B | 1.4GB | `ollama run gemma:2b` |
|
||||
| Gemma | 7B | 4.8GB | `ollama run gemma:7b` |
|
||||
@@ -377,6 +378,3 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
||||
- [Page Assist](https://github.com/n4ze3m/page-assist) (Chrome Extension)
|
||||
- [AI Telegram Bot](https://github.com/tusharhero/aitelegrambot) (Telegram bot using Ollama in backend)
|
||||
- [AI ST Completion](https://github.com/yaroslavyaroslav/OpenAI-sublime-text) (Sublime Text 4 AI assistant plugin with Ollama support)
|
||||
|
||||
### Supported backends
|
||||
- [llama.cpp](https://github.com/ggerganov/llama.cpp) project founded by Georgi Gerganov.
|
||||
|
||||
@@ -20,8 +20,8 @@ import (
|
||||
"runtime"
|
||||
"strings"
|
||||
|
||||
"ollama.com/format"
|
||||
"ollama.com/version"
|
||||
"github.com/ollama/ollama/format"
|
||||
"github.com/ollama/ollama/version"
|
||||
)
|
||||
|
||||
// Client encapsulates client state for interacting with the ollama
|
||||
|
||||
@@ -9,8 +9,8 @@ import (
|
||||
"os/signal"
|
||||
"syscall"
|
||||
|
||||
"ollama.com/app/store"
|
||||
"ollama.com/app/tray"
|
||||
"github.com/ollama/ollama/app/store"
|
||||
"github.com/ollama/ollama/app/tray"
|
||||
)
|
||||
|
||||
func Run() {
|
||||
|
||||
@@ -11,7 +11,7 @@ import (
|
||||
"path/filepath"
|
||||
"time"
|
||||
|
||||
"ollama.com/api"
|
||||
"github.com/ollama/ollama/api"
|
||||
)
|
||||
|
||||
func getCLIFullPath(command string) string {
|
||||
|
||||
@@ -18,8 +18,8 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"ollama.com/auth"
|
||||
"ollama.com/version"
|
||||
"github.com/ollama/ollama/auth"
|
||||
"github.com/ollama/ollama/version"
|
||||
)
|
||||
|
||||
var (
|
||||
|
||||
@@ -4,7 +4,7 @@ package main
|
||||
// go build -ldflags="-H windowsgui" .
|
||||
|
||||
import (
|
||||
"ollama.com/app/lifecycle"
|
||||
"github.com/ollama/ollama/app/lifecycle"
|
||||
)
|
||||
|
||||
func main() {
|
||||
|
||||
@@ -4,8 +4,8 @@ import (
|
||||
"fmt"
|
||||
"runtime"
|
||||
|
||||
"ollama.com/app/assets"
|
||||
"ollama.com/app/tray/commontray"
|
||||
"github.com/ollama/ollama/app/assets"
|
||||
"github.com/ollama/ollama/app/tray/commontray"
|
||||
)
|
||||
|
||||
func NewTray() (commontray.OllamaTray, error) {
|
||||
|
||||
@@ -5,7 +5,7 @@ package tray
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"ollama.com/app/tray/commontray"
|
||||
"github.com/ollama/ollama/app/tray/commontray"
|
||||
)
|
||||
|
||||
func InitPlatformTray(icon, updateIcon []byte) (commontray.OllamaTray, error) {
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
package tray
|
||||
|
||||
import (
|
||||
"ollama.com/app/tray/commontray"
|
||||
"ollama.com/app/tray/wintray"
|
||||
"github.com/ollama/ollama/app/tray/commontray"
|
||||
"github.com/ollama/ollama/app/tray/wintray"
|
||||
)
|
||||
|
||||
func InitPlatformTray(icon, updateIcon []byte) (commontray.OllamaTray, error) {
|
||||
|
||||
@@ -13,8 +13,8 @@ import (
|
||||
"sync"
|
||||
"unsafe"
|
||||
|
||||
"github.com/ollama/ollama/app/tray/commontray"
|
||||
"golang.org/x/sys/windows"
|
||||
"ollama.com/app/tray/commontray"
|
||||
)
|
||||
|
||||
// Helpful sources: https://github.com/golang/exp/blob/master/shiny/driver/internal/win32
|
||||
|
||||
12
cmd/cmd.go
12
cmd/cmd.go
@@ -30,12 +30,12 @@ import (
|
||||
"golang.org/x/exp/slices"
|
||||
"golang.org/x/term"
|
||||
|
||||
"ollama.com/api"
|
||||
"ollama.com/format"
|
||||
"ollama.com/parser"
|
||||
"ollama.com/progress"
|
||||
"ollama.com/server"
|
||||
"ollama.com/version"
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/format"
|
||||
"github.com/ollama/ollama/parser"
|
||||
"github.com/ollama/ollama/progress"
|
||||
"github.com/ollama/ollama/server"
|
||||
"github.com/ollama/ollama/version"
|
||||
)
|
||||
|
||||
func CreateHandler(cmd *cobra.Command, args []string) error {
|
||||
|
||||
@@ -14,9 +14,9 @@ import (
|
||||
"github.com/spf13/cobra"
|
||||
"golang.org/x/exp/slices"
|
||||
|
||||
"ollama.com/api"
|
||||
"ollama.com/progress"
|
||||
"ollama.com/readline"
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/progress"
|
||||
"github.com/ollama/ollama/readline"
|
||||
)
|
||||
|
||||
type MultilineState int
|
||||
|
||||
@@ -7,7 +7,7 @@ import (
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
||||
"ollama.com/api"
|
||||
"github.com/ollama/ollama/api"
|
||||
)
|
||||
|
||||
func TestExtractFilenames(t *testing.T) {
|
||||
|
||||
@@ -7,7 +7,7 @@ import (
|
||||
"os/exec"
|
||||
"strings"
|
||||
|
||||
"ollama.com/api"
|
||||
"github.com/ollama/ollama/api"
|
||||
)
|
||||
|
||||
func startApp(ctx context.Context, client *api.Client) error {
|
||||
|
||||
@@ -6,7 +6,7 @@ import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"ollama.com/api"
|
||||
"github.com/ollama/ollama/api"
|
||||
)
|
||||
|
||||
func startApp(ctx context.Context, client *api.Client) error {
|
||||
|
||||
@@ -10,7 +10,7 @@ import (
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
"ollama.com/api"
|
||||
"github.com/ollama/ollama/api"
|
||||
)
|
||||
|
||||
func startApp(ctx context.Context, client *api.Client) error {
|
||||
|
||||
@@ -13,8 +13,8 @@ import (
|
||||
|
||||
"google.golang.org/protobuf/proto"
|
||||
|
||||
"ollama.com/convert/sentencepiece"
|
||||
"ollama.com/llm"
|
||||
"github.com/ollama/ollama/convert/sentencepiece"
|
||||
"github.com/ollama/ollama/llm"
|
||||
)
|
||||
|
||||
type Params struct {
|
||||
|
||||
@@ -12,7 +12,7 @@ import (
|
||||
"github.com/pdevine/tensor"
|
||||
"github.com/pdevine/tensor/native"
|
||||
|
||||
"ollama.com/llm"
|
||||
"github.com/ollama/ollama/llm"
|
||||
)
|
||||
|
||||
type GemmaModel struct {
|
||||
|
||||
@@ -14,7 +14,7 @@ import (
|
||||
"github.com/pdevine/tensor/native"
|
||||
"github.com/x448/float16"
|
||||
|
||||
"ollama.com/llm"
|
||||
"github.com/ollama/ollama/llm"
|
||||
)
|
||||
|
||||
type LlamaModel struct {
|
||||
|
||||
@@ -13,7 +13,7 @@ import (
|
||||
"github.com/pdevine/tensor/native"
|
||||
"github.com/x448/float16"
|
||||
|
||||
"ollama.com/llm"
|
||||
"github.com/ollama/ollama/llm"
|
||||
)
|
||||
|
||||
type MistralModel struct {
|
||||
|
||||
@@ -16,7 +16,7 @@ import (
|
||||
"github.com/mitchellh/mapstructure"
|
||||
"github.com/x448/float16"
|
||||
|
||||
"ollama.com/llm"
|
||||
"github.com/ollama/ollama/llm"
|
||||
)
|
||||
|
||||
type safetensorWriterTo struct {
|
||||
|
||||
@@ -15,7 +15,7 @@ import (
|
||||
"github.com/nlpodyssey/gopickle/types"
|
||||
"github.com/x448/float16"
|
||||
|
||||
"ollama.com/llm"
|
||||
"github.com/ollama/ollama/llm"
|
||||
)
|
||||
|
||||
type torchWriterTo struct {
|
||||
|
||||
@@ -5,7 +5,7 @@ import (
|
||||
"fmt"
|
||||
"log"
|
||||
|
||||
"ollama.com/api"
|
||||
"github.com/ollama/ollama/api"
|
||||
)
|
||||
|
||||
func main() {
|
||||
|
||||
@@ -5,7 +5,7 @@ import (
|
||||
"fmt"
|
||||
"log"
|
||||
|
||||
"ollama.com/api"
|
||||
"github.com/ollama/ollama/api"
|
||||
)
|
||||
|
||||
func main() {
|
||||
|
||||
@@ -5,7 +5,7 @@ import (
|
||||
"fmt"
|
||||
"log"
|
||||
|
||||
"ollama.com/api"
|
||||
"github.com/ollama/ollama/api"
|
||||
)
|
||||
|
||||
func main() {
|
||||
|
||||
@@ -6,7 +6,7 @@ import (
|
||||
"log"
|
||||
"os"
|
||||
|
||||
"ollama.com/api"
|
||||
"github.com/ollama/ollama/api"
|
||||
)
|
||||
|
||||
func main() {
|
||||
|
||||
@@ -5,7 +5,7 @@ import (
|
||||
"fmt"
|
||||
"log"
|
||||
|
||||
"ollama.com/api"
|
||||
"github.com/ollama/ollama/api"
|
||||
)
|
||||
|
||||
func main() {
|
||||
|
||||
@@ -21,7 +21,7 @@ import (
|
||||
"sync"
|
||||
"unsafe"
|
||||
|
||||
"ollama.com/format"
|
||||
"github.com/ollama/ollama/format"
|
||||
)
|
||||
|
||||
type handles struct {
|
||||
|
||||
@@ -55,6 +55,6 @@ func getCPUMem() (memInfo, error) {
|
||||
return memInfo{
|
||||
TotalMemory: uint64(C.getPhysicalMemory()),
|
||||
FreeMemory: 0,
|
||||
DeviceCount: 1,
|
||||
DeviceCount: 0,
|
||||
}, nil
|
||||
}
|
||||
|
||||
@@ -8,7 +8,7 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"ollama.com/api"
|
||||
"github.com/ollama/ollama/api"
|
||||
)
|
||||
|
||||
func TestOrcaMiniBlueSky(t *testing.T) {
|
||||
|
||||
@@ -8,7 +8,7 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"ollama.com/api"
|
||||
"github.com/ollama/ollama/api"
|
||||
)
|
||||
|
||||
func TestContextExhaustion(t *testing.T) {
|
||||
|
||||
@@ -9,8 +9,8 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/stretchr/testify/require"
|
||||
"ollama.com/api"
|
||||
)
|
||||
|
||||
func TestIntegrationMultimodal(t *testing.T) {
|
||||
|
||||
@@ -9,7 +9,7 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"ollama.com/api"
|
||||
"github.com/ollama/ollama/api"
|
||||
)
|
||||
|
||||
// TODO - this would ideally be in the llm package, but that would require some refactoring of interfaces in the server
|
||||
|
||||
@@ -21,9 +21,9 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/app/lifecycle"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"ollama.com/api"
|
||||
"ollama.com/app/lifecycle"
|
||||
)
|
||||
|
||||
func FindPort() string {
|
||||
|
||||
33
llm/ext_server/server.cpp
vendored
33
llm/ext_server/server.cpp
vendored
@@ -39,10 +39,6 @@
|
||||
#include "httplib.h"
|
||||
#include "json.hpp"
|
||||
|
||||
#if defined(_WIN32)
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
#include <cstddef>
|
||||
#include <thread>
|
||||
#include <chrono>
|
||||
@@ -2774,28 +2770,8 @@ inline void signal_handler(int signal) {
|
||||
shutdown_handler(signal);
|
||||
}
|
||||
|
||||
#if defined(_WIN32)
|
||||
char* wchar_to_char(const wchar_t* wstr) {
|
||||
if (wstr == nullptr) return nullptr;
|
||||
|
||||
// Determine the number of bytes needed for the UTF-8 string
|
||||
int bytes = WideCharToMultiByte(CP_UTF8, 0, wstr, -1, nullptr, 0, nullptr, nullptr);
|
||||
char* str = new char[bytes];
|
||||
|
||||
// Convert the wide-character string to a UTF-8 string
|
||||
WideCharToMultiByte(CP_UTF8, 0, wstr, -1, str, bytes, nullptr, nullptr);
|
||||
return str;
|
||||
}
|
||||
|
||||
int wmain(int argc, wchar_t **wargv) {
|
||||
char** argv = new char*[argc];
|
||||
for (int i = 0; i < argc; ++i) {
|
||||
argv[i] = wchar_to_char(wargv[i]);
|
||||
}
|
||||
#else
|
||||
int main(int argc, char **argv) {
|
||||
#endif
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
#if SERVER_VERBOSE != 1
|
||||
log_disable();
|
||||
#endif
|
||||
@@ -3306,11 +3282,6 @@ int main(int argc, char **argv) {
|
||||
return (ctrl_type == CTRL_C_EVENT) ? (signal_handler(SIGINT), true) : false;
|
||||
};
|
||||
SetConsoleCtrlHandler(reinterpret_cast<PHANDLER_ROUTINE>(console_ctrl_handler), true);
|
||||
|
||||
for (int i = 0; i < argc; ++i) {
|
||||
delete[] argv[i];
|
||||
}
|
||||
delete[] argv;
|
||||
#endif
|
||||
llama.queue_tasks.start_loop();
|
||||
svr.stop();
|
||||
|
||||
@@ -164,8 +164,7 @@ func (ts Tensors) Layers() map[string]Layer {
|
||||
for _, t := range ts {
|
||||
parts := strings.Split(t.Name, ".")
|
||||
if parts[0] == "blk" {
|
||||
// join first and second part, e.g. blk.%d
|
||||
parts = append([]string{fmt.Sprintf("%s.%s", parts[0], parts[1])}, parts[2:]...)
|
||||
parts = parts[1:]
|
||||
}
|
||||
|
||||
if _, ok := layers[parts[0]]; !ok {
|
||||
@@ -381,12 +380,6 @@ func (llm GGML) GraphSize(context, batch uint64) (partialOffload, fullOffload ui
|
||||
)
|
||||
|
||||
partialOffload = 4*batch*(2*embedding+vocab) + embedding*vocab*105/128
|
||||
case "stablelm":
|
||||
fullOffload = 4 * batch * (context*(1+heads) + 3*embedding + 2)
|
||||
partialOffload = max(
|
||||
4*batch*(vocab+2*embedding),
|
||||
fullOffload,
|
||||
)
|
||||
}
|
||||
|
||||
return
|
||||
|
||||
21
llm/gguf.go
21
llm/gguf.go
@@ -248,17 +248,13 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error {
|
||||
}
|
||||
|
||||
padding := llm.padding(offset, int64(alignment))
|
||||
if _, err := rs.Seek(padding, io.SeekCurrent); err != nil {
|
||||
if _, err := rs.Seek(padding-offset, io.SeekCurrent); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, tensor := range llm.tensors {
|
||||
if _, err := rs.Seek(int64(tensor.size()), io.SeekCurrent); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
padding := llm.padding(int64(tensor.size()), int64(alignment))
|
||||
if _, err := rs.Seek(padding, io.SeekCurrent); err != nil {
|
||||
padded := (int64(tensor.size()) + int64(alignment) - 1) & ^(int64(alignment) - 1)
|
||||
if _, err := rs.Seek(padded, io.SeekCurrent); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
@@ -627,9 +623,8 @@ func (llm *gguf) Encode(ws io.WriteSeeker, kv KV, tensors []Tensor) error {
|
||||
return err
|
||||
}
|
||||
|
||||
var alignment int64 = 32
|
||||
padding := llm.padding(offset, alignment)
|
||||
if err := binary.Write(ws, llm.ByteOrder, bytes.Repeat([]byte{0}, int(padding))); err != nil {
|
||||
padding := llm.padding(offset, 32)
|
||||
if err := binary.Write(ws, llm.ByteOrder, bytes.Repeat([]byte{0}, int(padding-offset))); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -643,8 +638,8 @@ func (llm *gguf) Encode(ws io.WriteSeeker, kv KV, tensors []Tensor) error {
|
||||
return err
|
||||
}
|
||||
|
||||
padding := llm.padding(offset, alignment)
|
||||
if err := binary.Write(ws, llm.ByteOrder, bytes.Repeat([]byte{0}, int(padding))); err != nil {
|
||||
padding := llm.padding(offset, 32)
|
||||
if err := binary.Write(ws, llm.ByteOrder, bytes.Repeat([]byte{0}, int(padding-offset))); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
@@ -653,5 +648,5 @@ func (llm *gguf) Encode(ws io.WriteSeeker, kv KV, tensors []Tensor) error {
|
||||
}
|
||||
|
||||
func (gguf) padding(offset, align int64) int64 {
|
||||
return (align - offset%align) % align
|
||||
return (offset + align - 1) / align * align
|
||||
}
|
||||
|
||||
@@ -14,7 +14,7 @@ import (
|
||||
"golang.org/x/exp/slices"
|
||||
"golang.org/x/sync/errgroup"
|
||||
|
||||
"ollama.com/gpu"
|
||||
"github.com/ollama/ollama/gpu"
|
||||
)
|
||||
|
||||
var errPayloadMissing = fmt.Errorf("expected payloads not included in this build of ollama")
|
||||
|
||||
@@ -21,9 +21,9 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"ollama.com/api"
|
||||
"ollama.com/format"
|
||||
"ollama.com/gpu"
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/format"
|
||||
"github.com/ollama/ollama/gpu"
|
||||
)
|
||||
|
||||
// LlamaServer is an instance of the llama.cpp server
|
||||
@@ -79,9 +79,6 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
|
||||
graphFullOffload = graphPartialOffload
|
||||
}
|
||||
|
||||
graphFullOffload *= uint64(info.DeviceCount)
|
||||
graphPartialOffload *= uint64(info.DeviceCount)
|
||||
|
||||
// memoryRequiredTotal represents the memory required for full GPU offloading (all layers)
|
||||
memoryRequiredTotal := memoryMinimum + graphFullOffload
|
||||
|
||||
@@ -97,7 +94,7 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
|
||||
var layerCount int
|
||||
layers := ggml.Tensors().Layers()
|
||||
for i := 0; i < int(ggml.KV().BlockCount()); i++ {
|
||||
memoryLayer := layers[fmt.Sprintf("blk.%d", i)].size()
|
||||
memoryLayer := layers[fmt.Sprintf("%d", i)].size()
|
||||
|
||||
// KV is proportional to the number of layers
|
||||
memoryLayer += kv / ggml.KV().BlockCount()
|
||||
@@ -109,13 +106,7 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
|
||||
}
|
||||
}
|
||||
|
||||
var memoryLayerOutput uint64
|
||||
for k, v := range layers {
|
||||
if !strings.HasPrefix(k, "blk.") {
|
||||
memoryLayerOutput += v.size()
|
||||
}
|
||||
}
|
||||
|
||||
memoryLayerOutput := layers["output"].size()
|
||||
memoryRequiredTotal += memoryLayerOutput
|
||||
|
||||
if info.Library == "metal" && memoryRequiredTotal > info.TotalMemory {
|
||||
@@ -130,47 +121,16 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
|
||||
opts.NumGPU = layerCount
|
||||
}
|
||||
|
||||
memoryWeights := memoryRequiredTotal - memoryMinimum - graphFullOffload - kv
|
||||
|
||||
slog.Info(
|
||||
"offload to gpu",
|
||||
slog.Group(
|
||||
"layers",
|
||||
// actual number of layers offloaded
|
||||
"real", opts.NumGPU,
|
||||
// estimated number of layers that can be offloaded
|
||||
"estimate", layerCount,
|
||||
),
|
||||
slog.Group(
|
||||
"memory",
|
||||
// memory available for offloading
|
||||
"available", format.HumanBytes2(memoryAvailable),
|
||||
slog.Group(
|
||||
"required",
|
||||
// memory required for full offloading
|
||||
"full", format.HumanBytes2(memoryRequiredTotal),
|
||||
// memory required to offload layers.estimate layers
|
||||
"partial", format.HumanBytes2(memoryRequiredPartial),
|
||||
// memory of KV cache
|
||||
"kv", format.HumanBytes2(kv),
|
||||
),
|
||||
slog.Group(
|
||||
"weights",
|
||||
// memory of the weights
|
||||
"total", format.HumanBytes2(memoryWeights),
|
||||
// memory of repeating layers
|
||||
"repeating", format.HumanBytes2(memoryWeights-memoryLayerOutput),
|
||||
// memory of non-repeating layers
|
||||
"nonrepeating", format.HumanBytes2(memoryLayerOutput),
|
||||
),
|
||||
slog.Group(
|
||||
"graph",
|
||||
// memory of graph when fully offloaded
|
||||
"full", format.HumanBytes2(graphFullOffload),
|
||||
// memory of graph when not fully offloaded
|
||||
"partial", format.HumanBytes2(graphPartialOffload),
|
||||
),
|
||||
),
|
||||
"reallayers", opts.NumGPU,
|
||||
"layers", layerCount,
|
||||
"required", format.HumanBytes2(memoryRequiredTotal),
|
||||
"used", format.HumanBytes2(memoryRequiredPartial),
|
||||
"available", format.HumanBytes2(memoryAvailable),
|
||||
"kv", format.HumanBytes2(kv),
|
||||
"fulloffload", format.HumanBytes2(graphFullOffload),
|
||||
"partialoffload", format.HumanBytes2(graphPartialOffload),
|
||||
)
|
||||
|
||||
if len(adapters) > 1 {
|
||||
|
||||
2
main.go
2
main.go
@@ -3,8 +3,8 @@ package main
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/ollama/ollama/cmd"
|
||||
"github.com/spf13/cobra"
|
||||
"ollama.com/cmd"
|
||||
)
|
||||
|
||||
func main() {
|
||||
|
||||
@@ -11,7 +11,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"ollama.com/api"
|
||||
"github.com/ollama/ollama/api"
|
||||
)
|
||||
|
||||
type Error struct {
|
||||
|
||||
@@ -6,8 +6,8 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ollama/ollama/format"
|
||||
"golang.org/x/term"
|
||||
"ollama.com/format"
|
||||
)
|
||||
|
||||
type Bar struct {
|
||||
|
||||
@@ -15,8 +15,8 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"ollama.com/api"
|
||||
"ollama.com/auth"
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/auth"
|
||||
)
|
||||
|
||||
type registryChallenge struct {
|
||||
|
||||
@@ -21,8 +21,8 @@ import (
|
||||
|
||||
"golang.org/x/sync/errgroup"
|
||||
|
||||
"ollama.com/api"
|
||||
"ollama.com/format"
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/format"
|
||||
)
|
||||
|
||||
const maxRetries = 6
|
||||
|
||||
@@ -24,12 +24,12 @@ import (
|
||||
|
||||
"golang.org/x/exp/slices"
|
||||
|
||||
"ollama.com/api"
|
||||
"ollama.com/convert"
|
||||
"ollama.com/format"
|
||||
"ollama.com/llm"
|
||||
"ollama.com/parser"
|
||||
"ollama.com/version"
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/convert"
|
||||
"github.com/ollama/ollama/format"
|
||||
"github.com/ollama/ollama/llm"
|
||||
"github.com/ollama/ollama/parser"
|
||||
"github.com/ollama/ollama/version"
|
||||
)
|
||||
|
||||
type registryOptions struct {
|
||||
|
||||
@@ -7,7 +7,7 @@ import (
|
||||
"text/template"
|
||||
"text/template/parse"
|
||||
|
||||
"ollama.com/api"
|
||||
"github.com/ollama/ollama/api"
|
||||
)
|
||||
|
||||
// isResponseNode checks if the node contains .Response
|
||||
|
||||
@@ -4,7 +4,7 @@ import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"ollama.com/api"
|
||||
"github.com/ollama/ollama/api"
|
||||
)
|
||||
|
||||
func TestPrompt(t *testing.T) {
|
||||
|
||||
@@ -27,12 +27,12 @@ import (
|
||||
"github.com/gin-gonic/gin"
|
||||
"golang.org/x/exp/slices"
|
||||
|
||||
"ollama.com/api"
|
||||
"ollama.com/gpu"
|
||||
"ollama.com/llm"
|
||||
"ollama.com/openai"
|
||||
"ollama.com/parser"
|
||||
"ollama.com/version"
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/gpu"
|
||||
"github.com/ollama/ollama/llm"
|
||||
"github.com/ollama/ollama/openai"
|
||||
"github.com/ollama/ollama/parser"
|
||||
"github.com/ollama/ollama/version"
|
||||
)
|
||||
|
||||
var mode string = gin.DebugMode
|
||||
|
||||
@@ -16,9 +16,9 @@ import (
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
||||
"ollama.com/api"
|
||||
"ollama.com/parser"
|
||||
"ollama.com/version"
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/parser"
|
||||
"github.com/ollama/ollama/version"
|
||||
)
|
||||
|
||||
func Test_Routes(t *testing.T) {
|
||||
|
||||
@@ -16,9 +16,9 @@ import (
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/format"
|
||||
"golang.org/x/sync/errgroup"
|
||||
"ollama.com/api"
|
||||
"ollama.com/format"
|
||||
)
|
||||
|
||||
var blobUploadManager sync.Map
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package model
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"strings"
|
||||
"unicode"
|
||||
@@ -47,8 +48,11 @@ var (
|
||||
// Digest.
|
||||
func ParseDigest(s string) Digest {
|
||||
typ, digest, ok := strings.Cut(s, "-")
|
||||
if !ok {
|
||||
typ, digest, ok = strings.Cut(s, ":")
|
||||
}
|
||||
if ok && isValidDigestType(typ) && isValidHex(digest) {
|
||||
return Digest{s: s}
|
||||
return Digest{s: fmt.Sprintf("%s-%s", typ, digest)}
|
||||
}
|
||||
return Digest{}
|
||||
}
|
||||
|
||||
@@ -12,7 +12,7 @@ import (
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"ollama.com/types/structs"
|
||||
"github.com/ollama/ollama/types/structs"
|
||||
)
|
||||
|
||||
// Errors
|
||||
@@ -521,8 +521,6 @@ func parts(s string) iter_Seq2[PartKind, string] {
|
||||
return
|
||||
}
|
||||
state, j, partLen = PartModel, i, 0
|
||||
case PartHost:
|
||||
// noop: support for host:port
|
||||
default:
|
||||
yield(PartExtraneous, s[i+1:j])
|
||||
return
|
||||
@@ -680,9 +678,6 @@ func isValidByteFor(kind PartKind, c byte) bool {
|
||||
if kind == PartNamespace && c == '.' {
|
||||
return false
|
||||
}
|
||||
if kind == PartHost && c == ':' {
|
||||
return true
|
||||
}
|
||||
if c == '.' || c == '-' {
|
||||
return true
|
||||
}
|
||||
|
||||
@@ -40,7 +40,6 @@ var testNames = map[string]fields{
|
||||
"user/model": {namespace: "user", model: "model"},
|
||||
"example.com/ns/mistral:7b+Q4_0": {host: "example.com", namespace: "ns", model: "mistral", tag: "7b", build: "Q4_0"},
|
||||
"example.com/ns/mistral:7b+X": {host: "example.com", namespace: "ns", model: "mistral", tag: "7b", build: "X"},
|
||||
"localhost:5000/ns/mistral": {host: "localhost:5000", namespace: "ns", model: "mistral"},
|
||||
|
||||
// invalid digest
|
||||
"mistral:latest@invalid256-": {},
|
||||
|
||||
Reference in New Issue
Block a user