mirror of
https://github.com/ollama/ollama.git
synced 2026-01-20 13:29:04 -05:00
Compare commits
19 Commits
bmizerany/
...
ollama.com
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
425219f1ca | ||
|
|
8645076a71 | ||
|
|
05e9424824 | ||
|
|
52ebe67a98 | ||
|
|
889b31ab78 | ||
|
|
3cf483fe48 | ||
|
|
c8afe7168c | ||
|
|
28d3cd0148 | ||
|
|
eb5554232a | ||
|
|
2bdc320216 | ||
|
|
32561aed09 | ||
|
|
71548d9829 | ||
|
|
a8b9b930b4 | ||
|
|
9755cf9173 | ||
|
|
9df6c85c3a | ||
|
|
e74163af4c | ||
|
|
fb9580df85 | ||
|
|
26df674785 | ||
|
|
7c9792a6e0 |
60
.github/ISSUE_TEMPLATE/10_bug_report.yml
vendored
Normal file
60
.github/ISSUE_TEMPLATE/10_bug_report.yml
vendored
Normal file
@@ -0,0 +1,60 @@
|
||||
name: Bug report
|
||||
labels: [bug]
|
||||
description: Something isn't working right.
|
||||
body:
|
||||
- type: textarea
|
||||
id: description
|
||||
attributes:
|
||||
label: What is the issue?
|
||||
description: What happened? What did you expect to happen?
|
||||
validations:
|
||||
required: true
|
||||
- type: dropdown
|
||||
id: os
|
||||
attributes:
|
||||
label: OS
|
||||
description: Which operating system are you using?
|
||||
multiple: true
|
||||
options:
|
||||
- Linux
|
||||
- macOS
|
||||
- Windows
|
||||
- Docker
|
||||
- WSL2
|
||||
validations:
|
||||
required: false
|
||||
- type: dropdown
|
||||
id: gpu
|
||||
attributes:
|
||||
label: GPU
|
||||
description: Which GPU are you using?
|
||||
multiple: true
|
||||
options:
|
||||
- Nvidia
|
||||
- AMD
|
||||
- Intel
|
||||
- Apple
|
||||
- Other
|
||||
validations:
|
||||
required: false
|
||||
- type: dropdown
|
||||
id: cpu
|
||||
attributes:
|
||||
label: CPU
|
||||
description: Which CPU are you using?
|
||||
multiple: true
|
||||
options:
|
||||
- Intel
|
||||
- AMD
|
||||
- Apple
|
||||
- Other
|
||||
validations:
|
||||
required: false
|
||||
- type: input
|
||||
id: version
|
||||
attributes:
|
||||
label: Ollama version
|
||||
description: What version of Ollama are you using? (`ollama --version`)
|
||||
placeholder: e.g., 0.1.32
|
||||
validations:
|
||||
required: false
|
||||
18
.github/ISSUE_TEMPLATE/10_model_request.yml
vendored
18
.github/ISSUE_TEMPLATE/10_model_request.yml
vendored
@@ -1,18 +0,0 @@
|
||||
name: Model request
|
||||
description: Request a new model for the library
|
||||
labels: [mr]
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Please check if your Model request is [already available](https://ollama.com/search) or that you cannot [import it](https://github.com/ollama/ollama/blob/main/docs/import.md#import-a-model) yourself.
|
||||
Tell us about which Model you'd like to see in the library!
|
||||
- type: textarea
|
||||
id: problem
|
||||
attributes:
|
||||
label: What model would you like?
|
||||
description: Please provide a link to the model.
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Thanks for filing a model request!
|
||||
6
.github/ISSUE_TEMPLATE/20_feature_request.md
vendored
Normal file
6
.github/ISSUE_TEMPLATE/20_feature_request.md
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
---
|
||||
name: Feature request
|
||||
about: Request a new feature
|
||||
labels: feature request
|
||||
---
|
||||
|
||||
41
.github/ISSUE_TEMPLATE/20_feature_request.yml
vendored
41
.github/ISSUE_TEMPLATE/20_feature_request.yml
vendored
@@ -1,41 +0,0 @@
|
||||
name: Feature request
|
||||
description: Propose a new feature
|
||||
labels: [needs-triage, fr]
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Please check if your feature request is [already filed](https://github.com/ollama/ollama/issues).
|
||||
Tell us about your idea!
|
||||
- type: textarea
|
||||
id: problem
|
||||
attributes:
|
||||
label: What are you trying to do?
|
||||
description: Tell us about the problem you're trying to solve.
|
||||
validations:
|
||||
required: false
|
||||
- type: textarea
|
||||
id: solution
|
||||
attributes:
|
||||
label: How should we solve this?
|
||||
description: If you have an idea of how you'd like to see this feature work, let us know.
|
||||
validations:
|
||||
required: false
|
||||
- type: textarea
|
||||
id: alternative
|
||||
attributes:
|
||||
label: What is the impact of not solving this?
|
||||
description: (How) Are you currently working around the issue?
|
||||
validations:
|
||||
required: false
|
||||
- type: textarea
|
||||
id: context
|
||||
attributes:
|
||||
label: Anything else?
|
||||
description: Any additional context to share, e.g., links
|
||||
validations:
|
||||
required: false
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Thanks for filing a feature request!
|
||||
5
.github/ISSUE_TEMPLATE/30_model_request.md
vendored
Normal file
5
.github/ISSUE_TEMPLATE/30_model_request.md
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
---
|
||||
name: Model request
|
||||
about: Request support for a new model to be added to Ollama
|
||||
labels: model request
|
||||
---
|
||||
125
.github/ISSUE_TEMPLATE/90_bug_report.yml
vendored
125
.github/ISSUE_TEMPLATE/90_bug_report.yml
vendored
@@ -1,125 +0,0 @@
|
||||
name: Bug report
|
||||
description: File a bug report. If you need help, please join our Discord server.
|
||||
labels: [needs-triage, bug]
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Please check if your bug is [already filed](https://github.com/ollama/ollama/issues) before filing a new one.
|
||||
- type: textarea
|
||||
id: what-happened
|
||||
attributes:
|
||||
label: What is the issue?
|
||||
description: What happened? What did you expect to happen?
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: what-was-expected
|
||||
attributes:
|
||||
label: What did you expect to see?
|
||||
description: What did you expect to see/happen instead?
|
||||
validations:
|
||||
required: false
|
||||
- type: textarea
|
||||
id: steps
|
||||
attributes:
|
||||
label: Steps to reproduce
|
||||
description: What are the steps you took that hit this issue?
|
||||
validations:
|
||||
required: false
|
||||
- type: textarea
|
||||
id: changes
|
||||
attributes:
|
||||
label: Are there any recent changes that introduced the issue?
|
||||
description: If so, what are those changes?
|
||||
validations:
|
||||
required: false
|
||||
- type: dropdown
|
||||
id: os
|
||||
attributes:
|
||||
label: OS
|
||||
description: What OS are you using? You may select more than one.
|
||||
multiple: true
|
||||
options:
|
||||
- Linux
|
||||
- macOS
|
||||
- Windows
|
||||
- Other
|
||||
validations:
|
||||
required: false
|
||||
- type: dropdown
|
||||
id: architecture
|
||||
attributes:
|
||||
label: Architecture
|
||||
description: What architecture are you using? You may select more than one.
|
||||
multiple: true
|
||||
options:
|
||||
- arm64
|
||||
- amd64
|
||||
- x86
|
||||
- Other
|
||||
- type: dropdown
|
||||
id: platform
|
||||
attributes:
|
||||
label: Platform
|
||||
description: What platform are you using? You may select more than one.
|
||||
multiple: true
|
||||
options:
|
||||
- Docker
|
||||
- WSL
|
||||
- WSL2
|
||||
validations:
|
||||
required: false
|
||||
- type: input
|
||||
id: ollama-version
|
||||
attributes:
|
||||
label: Ollama version
|
||||
description: What Ollama version are you using? (`ollama --version`)
|
||||
placeholder: e.g., 1.14.4
|
||||
validations:
|
||||
required: false
|
||||
- type: dropdown
|
||||
id: gpu
|
||||
attributes:
|
||||
label: GPU
|
||||
description: What GPU, if any, are you using? You may select more than one.
|
||||
multiple: true
|
||||
options:
|
||||
- Nvidia
|
||||
- AMD
|
||||
- Intel
|
||||
- Apple
|
||||
- Other
|
||||
validations:
|
||||
required: false
|
||||
- type: textarea
|
||||
id: gpu-info
|
||||
attributes:
|
||||
label: GPU info
|
||||
description: What GPU info do you have? (`nvidia-smi`, `rocminfo`, `system_profiler SPDisplaysDataType`, etc.)
|
||||
validations:
|
||||
required: false
|
||||
- type: dropdown
|
||||
id: cpu
|
||||
attributes:
|
||||
label: CPU
|
||||
description: What CPU are you using? You may select more than one.
|
||||
multiple: true
|
||||
options:
|
||||
- Intel
|
||||
- AMD
|
||||
- Apple
|
||||
- Other
|
||||
validations:
|
||||
required: false
|
||||
- type: textarea
|
||||
id: other-software
|
||||
attributes:
|
||||
label: Other software
|
||||
description: What other software are you using that might be related to this issue?
|
||||
validations:
|
||||
required: false
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Thanks for filing a bug report!
|
||||
@@ -60,7 +60,6 @@ Here are some example models that can be downloaded:
|
||||
| Llama 2 13B | 13B | 7.3GB | `ollama run llama2:13b` |
|
||||
| Llama 2 70B | 70B | 39GB | `ollama run llama2:70b` |
|
||||
| Orca Mini | 3B | 1.9GB | `ollama run orca-mini` |
|
||||
| Vicuna | 7B | 3.8GB | `ollama run vicuna` |
|
||||
| LLaVA | 7B | 4.5GB | `ollama run llava` |
|
||||
| Gemma | 2B | 1.4GB | `ollama run gemma:2b` |
|
||||
| Gemma | 7B | 4.8GB | `ollama run gemma:7b` |
|
||||
@@ -378,3 +377,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
||||
- [Page Assist](https://github.com/n4ze3m/page-assist) (Chrome Extension)
|
||||
- [AI Telegram Bot](https://github.com/tusharhero/aitelegrambot) (Telegram bot using Ollama in backend)
|
||||
- [AI ST Completion](https://github.com/yaroslavyaroslav/OpenAI-sublime-text) (Sublime Text 4 AI assistant plugin with Ollama support)
|
||||
|
||||
### Supported backends
|
||||
- [llama.cpp](https://github.com/ggerganov/llama.cpp) project founded by Georgi Gerganov.
|
||||
|
||||
@@ -20,8 +20,8 @@ import (
|
||||
"runtime"
|
||||
"strings"
|
||||
|
||||
"github.com/ollama/ollama/format"
|
||||
"github.com/ollama/ollama/version"
|
||||
"ollama.com/format"
|
||||
"ollama.com/version"
|
||||
)
|
||||
|
||||
// Client encapsulates client state for interacting with the ollama
|
||||
|
||||
@@ -9,8 +9,8 @@ import (
|
||||
"os/signal"
|
||||
"syscall"
|
||||
|
||||
"github.com/ollama/ollama/app/store"
|
||||
"github.com/ollama/ollama/app/tray"
|
||||
"ollama.com/app/store"
|
||||
"ollama.com/app/tray"
|
||||
)
|
||||
|
||||
func Run() {
|
||||
|
||||
@@ -11,7 +11,7 @@ import (
|
||||
"path/filepath"
|
||||
"time"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"ollama.com/api"
|
||||
)
|
||||
|
||||
func getCLIFullPath(command string) string {
|
||||
|
||||
@@ -18,8 +18,8 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ollama/ollama/auth"
|
||||
"github.com/ollama/ollama/version"
|
||||
"ollama.com/auth"
|
||||
"ollama.com/version"
|
||||
)
|
||||
|
||||
var (
|
||||
|
||||
@@ -4,7 +4,7 @@ package main
|
||||
// go build -ldflags="-H windowsgui" .
|
||||
|
||||
import (
|
||||
"github.com/ollama/ollama/app/lifecycle"
|
||||
"ollama.com/app/lifecycle"
|
||||
)
|
||||
|
||||
func main() {
|
||||
|
||||
@@ -4,8 +4,8 @@ import (
|
||||
"fmt"
|
||||
"runtime"
|
||||
|
||||
"github.com/ollama/ollama/app/assets"
|
||||
"github.com/ollama/ollama/app/tray/commontray"
|
||||
"ollama.com/app/assets"
|
||||
"ollama.com/app/tray/commontray"
|
||||
)
|
||||
|
||||
func NewTray() (commontray.OllamaTray, error) {
|
||||
|
||||
@@ -5,7 +5,7 @@ package tray
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/ollama/ollama/app/tray/commontray"
|
||||
"ollama.com/app/tray/commontray"
|
||||
)
|
||||
|
||||
func InitPlatformTray(icon, updateIcon []byte) (commontray.OllamaTray, error) {
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
package tray
|
||||
|
||||
import (
|
||||
"github.com/ollama/ollama/app/tray/commontray"
|
||||
"github.com/ollama/ollama/app/tray/wintray"
|
||||
"ollama.com/app/tray/commontray"
|
||||
"ollama.com/app/tray/wintray"
|
||||
)
|
||||
|
||||
func InitPlatformTray(icon, updateIcon []byte) (commontray.OllamaTray, error) {
|
||||
|
||||
@@ -13,8 +13,8 @@ import (
|
||||
"sync"
|
||||
"unsafe"
|
||||
|
||||
"github.com/ollama/ollama/app/tray/commontray"
|
||||
"golang.org/x/sys/windows"
|
||||
"ollama.com/app/tray/commontray"
|
||||
)
|
||||
|
||||
// Helpful sources: https://github.com/golang/exp/blob/master/shiny/driver/internal/win32
|
||||
|
||||
12
cmd/cmd.go
12
cmd/cmd.go
@@ -30,12 +30,12 @@ import (
|
||||
"golang.org/x/exp/slices"
|
||||
"golang.org/x/term"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/format"
|
||||
"github.com/ollama/ollama/parser"
|
||||
"github.com/ollama/ollama/progress"
|
||||
"github.com/ollama/ollama/server"
|
||||
"github.com/ollama/ollama/version"
|
||||
"ollama.com/api"
|
||||
"ollama.com/format"
|
||||
"ollama.com/parser"
|
||||
"ollama.com/progress"
|
||||
"ollama.com/server"
|
||||
"ollama.com/version"
|
||||
)
|
||||
|
||||
func CreateHandler(cmd *cobra.Command, args []string) error {
|
||||
|
||||
@@ -14,9 +14,9 @@ import (
|
||||
"github.com/spf13/cobra"
|
||||
"golang.org/x/exp/slices"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/progress"
|
||||
"github.com/ollama/ollama/readline"
|
||||
"ollama.com/api"
|
||||
"ollama.com/progress"
|
||||
"ollama.com/readline"
|
||||
)
|
||||
|
||||
type MultilineState int
|
||||
|
||||
@@ -7,7 +7,7 @@ import (
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"ollama.com/api"
|
||||
)
|
||||
|
||||
func TestExtractFilenames(t *testing.T) {
|
||||
|
||||
@@ -7,7 +7,7 @@ import (
|
||||
"os/exec"
|
||||
"strings"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"ollama.com/api"
|
||||
)
|
||||
|
||||
func startApp(ctx context.Context, client *api.Client) error {
|
||||
|
||||
@@ -6,7 +6,7 @@ import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"ollama.com/api"
|
||||
)
|
||||
|
||||
func startApp(ctx context.Context, client *api.Client) error {
|
||||
|
||||
@@ -10,7 +10,7 @@ import (
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"ollama.com/api"
|
||||
)
|
||||
|
||||
func startApp(ctx context.Context, client *api.Client) error {
|
||||
|
||||
@@ -13,8 +13,8 @@ import (
|
||||
|
||||
"google.golang.org/protobuf/proto"
|
||||
|
||||
"github.com/ollama/ollama/convert/sentencepiece"
|
||||
"github.com/ollama/ollama/llm"
|
||||
"ollama.com/convert/sentencepiece"
|
||||
"ollama.com/llm"
|
||||
)
|
||||
|
||||
type Params struct {
|
||||
|
||||
@@ -12,7 +12,7 @@ import (
|
||||
"github.com/pdevine/tensor"
|
||||
"github.com/pdevine/tensor/native"
|
||||
|
||||
"github.com/ollama/ollama/llm"
|
||||
"ollama.com/llm"
|
||||
)
|
||||
|
||||
type GemmaModel struct {
|
||||
|
||||
@@ -14,7 +14,7 @@ import (
|
||||
"github.com/pdevine/tensor/native"
|
||||
"github.com/x448/float16"
|
||||
|
||||
"github.com/ollama/ollama/llm"
|
||||
"ollama.com/llm"
|
||||
)
|
||||
|
||||
type LlamaModel struct {
|
||||
|
||||
@@ -13,7 +13,7 @@ import (
|
||||
"github.com/pdevine/tensor/native"
|
||||
"github.com/x448/float16"
|
||||
|
||||
"github.com/ollama/ollama/llm"
|
||||
"ollama.com/llm"
|
||||
)
|
||||
|
||||
type MistralModel struct {
|
||||
|
||||
@@ -16,7 +16,7 @@ import (
|
||||
"github.com/mitchellh/mapstructure"
|
||||
"github.com/x448/float16"
|
||||
|
||||
"github.com/ollama/ollama/llm"
|
||||
"ollama.com/llm"
|
||||
)
|
||||
|
||||
type safetensorWriterTo struct {
|
||||
|
||||
@@ -15,7 +15,7 @@ import (
|
||||
"github.com/nlpodyssey/gopickle/types"
|
||||
"github.com/x448/float16"
|
||||
|
||||
"github.com/ollama/ollama/llm"
|
||||
"ollama.com/llm"
|
||||
)
|
||||
|
||||
type torchWriterTo struct {
|
||||
|
||||
@@ -5,7 +5,7 @@ import (
|
||||
"fmt"
|
||||
"log"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"ollama.com/api"
|
||||
)
|
||||
|
||||
func main() {
|
||||
|
||||
@@ -5,7 +5,7 @@ import (
|
||||
"fmt"
|
||||
"log"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"ollama.com/api"
|
||||
)
|
||||
|
||||
func main() {
|
||||
|
||||
@@ -5,7 +5,7 @@ import (
|
||||
"fmt"
|
||||
"log"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"ollama.com/api"
|
||||
)
|
||||
|
||||
func main() {
|
||||
|
||||
@@ -6,7 +6,7 @@ import (
|
||||
"log"
|
||||
"os"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"ollama.com/api"
|
||||
)
|
||||
|
||||
func main() {
|
||||
|
||||
@@ -5,7 +5,7 @@ import (
|
||||
"fmt"
|
||||
"log"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"ollama.com/api"
|
||||
)
|
||||
|
||||
func main() {
|
||||
|
||||
@@ -21,7 +21,7 @@ import (
|
||||
"sync"
|
||||
"unsafe"
|
||||
|
||||
"github.com/ollama/ollama/format"
|
||||
"ollama.com/format"
|
||||
)
|
||||
|
||||
type handles struct {
|
||||
|
||||
@@ -55,6 +55,6 @@ func getCPUMem() (memInfo, error) {
|
||||
return memInfo{
|
||||
TotalMemory: uint64(C.getPhysicalMemory()),
|
||||
FreeMemory: 0,
|
||||
DeviceCount: 0,
|
||||
DeviceCount: 1,
|
||||
}, nil
|
||||
}
|
||||
|
||||
@@ -8,7 +8,7 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"ollama.com/api"
|
||||
)
|
||||
|
||||
func TestOrcaMiniBlueSky(t *testing.T) {
|
||||
|
||||
@@ -8,7 +8,7 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"ollama.com/api"
|
||||
)
|
||||
|
||||
func TestContextExhaustion(t *testing.T) {
|
||||
|
||||
@@ -9,8 +9,8 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/stretchr/testify/require"
|
||||
"ollama.com/api"
|
||||
)
|
||||
|
||||
func TestIntegrationMultimodal(t *testing.T) {
|
||||
|
||||
@@ -9,7 +9,7 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"ollama.com/api"
|
||||
)
|
||||
|
||||
// TODO - this would ideally be in the llm package, but that would require some refactoring of interfaces in the server
|
||||
|
||||
@@ -21,9 +21,9 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/app/lifecycle"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"ollama.com/api"
|
||||
"ollama.com/app/lifecycle"
|
||||
)
|
||||
|
||||
func FindPort() string {
|
||||
|
||||
33
llm/ext_server/server.cpp
vendored
33
llm/ext_server/server.cpp
vendored
@@ -39,6 +39,10 @@
|
||||
#include "httplib.h"
|
||||
#include "json.hpp"
|
||||
|
||||
#if defined(_WIN32)
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
#include <cstddef>
|
||||
#include <thread>
|
||||
#include <chrono>
|
||||
@@ -2770,8 +2774,28 @@ inline void signal_handler(int signal) {
|
||||
shutdown_handler(signal);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
char* wchar_to_char(const wchar_t* wstr) {
|
||||
if (wstr == nullptr) return nullptr;
|
||||
|
||||
// Determine the number of bytes needed for the UTF-8 string
|
||||
int bytes = WideCharToMultiByte(CP_UTF8, 0, wstr, -1, nullptr, 0, nullptr, nullptr);
|
||||
char* str = new char[bytes];
|
||||
|
||||
// Convert the wide-character string to a UTF-8 string
|
||||
WideCharToMultiByte(CP_UTF8, 0, wstr, -1, str, bytes, nullptr, nullptr);
|
||||
return str;
|
||||
}
|
||||
|
||||
int wmain(int argc, wchar_t **wargv) {
|
||||
char** argv = new char*[argc];
|
||||
for (int i = 0; i < argc; ++i) {
|
||||
argv[i] = wchar_to_char(wargv[i]);
|
||||
}
|
||||
#else
|
||||
int main(int argc, char **argv) {
|
||||
#endif
|
||||
|
||||
#if SERVER_VERBOSE != 1
|
||||
log_disable();
|
||||
#endif
|
||||
@@ -3282,6 +3306,11 @@ int main(int argc, char **argv)
|
||||
return (ctrl_type == CTRL_C_EVENT) ? (signal_handler(SIGINT), true) : false;
|
||||
};
|
||||
SetConsoleCtrlHandler(reinterpret_cast<PHANDLER_ROUTINE>(console_ctrl_handler), true);
|
||||
|
||||
for (int i = 0; i < argc; ++i) {
|
||||
delete[] argv[i];
|
||||
}
|
||||
delete[] argv;
|
||||
#endif
|
||||
llama.queue_tasks.start_loop();
|
||||
svr.stop();
|
||||
|
||||
@@ -164,7 +164,8 @@ func (ts Tensors) Layers() map[string]Layer {
|
||||
for _, t := range ts {
|
||||
parts := strings.Split(t.Name, ".")
|
||||
if parts[0] == "blk" {
|
||||
parts = parts[1:]
|
||||
// join first and second part, e.g. blk.%d
|
||||
parts = append([]string{fmt.Sprintf("%s.%s", parts[0], parts[1])}, parts[2:]...)
|
||||
}
|
||||
|
||||
if _, ok := layers[parts[0]]; !ok {
|
||||
@@ -380,6 +381,12 @@ func (llm GGML) GraphSize(context, batch uint64) (partialOffload, fullOffload ui
|
||||
)
|
||||
|
||||
partialOffload = 4*batch*(2*embedding+vocab) + embedding*vocab*105/128
|
||||
case "stablelm":
|
||||
fullOffload = 4 * batch * (context*(1+heads) + 3*embedding + 2)
|
||||
partialOffload = max(
|
||||
4*batch*(vocab+2*embedding),
|
||||
fullOffload,
|
||||
)
|
||||
}
|
||||
|
||||
return
|
||||
|
||||
21
llm/gguf.go
21
llm/gguf.go
@@ -248,13 +248,17 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error {
|
||||
}
|
||||
|
||||
padding := llm.padding(offset, int64(alignment))
|
||||
if _, err := rs.Seek(padding-offset, io.SeekCurrent); err != nil {
|
||||
if _, err := rs.Seek(padding, io.SeekCurrent); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, tensor := range llm.tensors {
|
||||
padded := (int64(tensor.size()) + int64(alignment) - 1) & ^(int64(alignment) - 1)
|
||||
if _, err := rs.Seek(padded, io.SeekCurrent); err != nil {
|
||||
if _, err := rs.Seek(int64(tensor.size()), io.SeekCurrent); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
padding := llm.padding(int64(tensor.size()), int64(alignment))
|
||||
if _, err := rs.Seek(padding, io.SeekCurrent); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
@@ -623,8 +627,9 @@ func (llm *gguf) Encode(ws io.WriteSeeker, kv KV, tensors []Tensor) error {
|
||||
return err
|
||||
}
|
||||
|
||||
padding := llm.padding(offset, 32)
|
||||
if err := binary.Write(ws, llm.ByteOrder, bytes.Repeat([]byte{0}, int(padding-offset))); err != nil {
|
||||
var alignment int64 = 32
|
||||
padding := llm.padding(offset, alignment)
|
||||
if err := binary.Write(ws, llm.ByteOrder, bytes.Repeat([]byte{0}, int(padding))); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -638,8 +643,8 @@ func (llm *gguf) Encode(ws io.WriteSeeker, kv KV, tensors []Tensor) error {
|
||||
return err
|
||||
}
|
||||
|
||||
padding := llm.padding(offset, 32)
|
||||
if err := binary.Write(ws, llm.ByteOrder, bytes.Repeat([]byte{0}, int(padding-offset))); err != nil {
|
||||
padding := llm.padding(offset, alignment)
|
||||
if err := binary.Write(ws, llm.ByteOrder, bytes.Repeat([]byte{0}, int(padding))); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
@@ -648,5 +653,5 @@ func (llm *gguf) Encode(ws io.WriteSeeker, kv KV, tensors []Tensor) error {
|
||||
}
|
||||
|
||||
func (gguf) padding(offset, align int64) int64 {
|
||||
return (offset + align - 1) / align * align
|
||||
return (align - offset%align) % align
|
||||
}
|
||||
|
||||
@@ -14,7 +14,7 @@ import (
|
||||
"golang.org/x/exp/slices"
|
||||
"golang.org/x/sync/errgroup"
|
||||
|
||||
"github.com/ollama/ollama/gpu"
|
||||
"ollama.com/gpu"
|
||||
)
|
||||
|
||||
var errPayloadMissing = fmt.Errorf("expected payloads not included in this build of ollama")
|
||||
|
||||
@@ -21,9 +21,9 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/format"
|
||||
"github.com/ollama/ollama/gpu"
|
||||
"ollama.com/api"
|
||||
"ollama.com/format"
|
||||
"ollama.com/gpu"
|
||||
)
|
||||
|
||||
// LlamaServer is an instance of the llama.cpp server
|
||||
@@ -79,6 +79,9 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
|
||||
graphFullOffload = graphPartialOffload
|
||||
}
|
||||
|
||||
graphFullOffload *= uint64(info.DeviceCount)
|
||||
graphPartialOffload *= uint64(info.DeviceCount)
|
||||
|
||||
// memoryRequiredTotal represents the memory required for full GPU offloading (all layers)
|
||||
memoryRequiredTotal := memoryMinimum + graphFullOffload
|
||||
|
||||
@@ -94,7 +97,7 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
|
||||
var layerCount int
|
||||
layers := ggml.Tensors().Layers()
|
||||
for i := 0; i < int(ggml.KV().BlockCount()); i++ {
|
||||
memoryLayer := layers[fmt.Sprintf("%d", i)].size()
|
||||
memoryLayer := layers[fmt.Sprintf("blk.%d", i)].size()
|
||||
|
||||
// KV is proportional to the number of layers
|
||||
memoryLayer += kv / ggml.KV().BlockCount()
|
||||
@@ -106,7 +109,13 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
|
||||
}
|
||||
}
|
||||
|
||||
memoryLayerOutput := layers["output"].size()
|
||||
var memoryLayerOutput uint64
|
||||
for k, v := range layers {
|
||||
if !strings.HasPrefix(k, "blk.") {
|
||||
memoryLayerOutput += v.size()
|
||||
}
|
||||
}
|
||||
|
||||
memoryRequiredTotal += memoryLayerOutput
|
||||
|
||||
if info.Library == "metal" && memoryRequiredTotal > info.TotalMemory {
|
||||
@@ -121,16 +130,47 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
|
||||
opts.NumGPU = layerCount
|
||||
}
|
||||
|
||||
memoryWeights := memoryRequiredTotal - memoryMinimum - graphFullOffload - kv
|
||||
|
||||
slog.Info(
|
||||
"offload to gpu",
|
||||
"reallayers", opts.NumGPU,
|
||||
"layers", layerCount,
|
||||
"required", format.HumanBytes2(memoryRequiredTotal),
|
||||
"used", format.HumanBytes2(memoryRequiredPartial),
|
||||
"available", format.HumanBytes2(memoryAvailable),
|
||||
"kv", format.HumanBytes2(kv),
|
||||
"fulloffload", format.HumanBytes2(graphFullOffload),
|
||||
"partialoffload", format.HumanBytes2(graphPartialOffload),
|
||||
slog.Group(
|
||||
"layers",
|
||||
// actual number of layers offloaded
|
||||
"real", opts.NumGPU,
|
||||
// estimated number of layers that can be offloaded
|
||||
"estimate", layerCount,
|
||||
),
|
||||
slog.Group(
|
||||
"memory",
|
||||
// memory available for offloading
|
||||
"available", format.HumanBytes2(memoryAvailable),
|
||||
slog.Group(
|
||||
"required",
|
||||
// memory required for full offloading
|
||||
"full", format.HumanBytes2(memoryRequiredTotal),
|
||||
// memory required to offload layers.estimate layers
|
||||
"partial", format.HumanBytes2(memoryRequiredPartial),
|
||||
// memory of KV cache
|
||||
"kv", format.HumanBytes2(kv),
|
||||
),
|
||||
slog.Group(
|
||||
"weights",
|
||||
// memory of the weights
|
||||
"total", format.HumanBytes2(memoryWeights),
|
||||
// memory of repeating layers
|
||||
"repeating", format.HumanBytes2(memoryWeights-memoryLayerOutput),
|
||||
// memory of non-repeating layers
|
||||
"nonrepeating", format.HumanBytes2(memoryLayerOutput),
|
||||
),
|
||||
slog.Group(
|
||||
"graph",
|
||||
// memory of graph when fully offloaded
|
||||
"full", format.HumanBytes2(graphFullOffload),
|
||||
// memory of graph when not fully offloaded
|
||||
"partial", format.HumanBytes2(graphPartialOffload),
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
if len(adapters) > 1 {
|
||||
|
||||
2
main.go
2
main.go
@@ -3,8 +3,8 @@ package main
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/ollama/ollama/cmd"
|
||||
"github.com/spf13/cobra"
|
||||
"ollama.com/cmd"
|
||||
)
|
||||
|
||||
func main() {
|
||||
|
||||
@@ -11,7 +11,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/ollama/ollama/api"
|
||||
"ollama.com/api"
|
||||
)
|
||||
|
||||
type Error struct {
|
||||
|
||||
@@ -6,8 +6,8 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ollama/ollama/format"
|
||||
"golang.org/x/term"
|
||||
"ollama.com/format"
|
||||
)
|
||||
|
||||
type Bar struct {
|
||||
|
||||
@@ -15,8 +15,8 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/auth"
|
||||
"ollama.com/api"
|
||||
"ollama.com/auth"
|
||||
)
|
||||
|
||||
type registryChallenge struct {
|
||||
|
||||
@@ -21,8 +21,8 @@ import (
|
||||
|
||||
"golang.org/x/sync/errgroup"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/format"
|
||||
"ollama.com/api"
|
||||
"ollama.com/format"
|
||||
)
|
||||
|
||||
const maxRetries = 6
|
||||
|
||||
@@ -24,12 +24,12 @@ import (
|
||||
|
||||
"golang.org/x/exp/slices"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/convert"
|
||||
"github.com/ollama/ollama/format"
|
||||
"github.com/ollama/ollama/llm"
|
||||
"github.com/ollama/ollama/parser"
|
||||
"github.com/ollama/ollama/version"
|
||||
"ollama.com/api"
|
||||
"ollama.com/convert"
|
||||
"ollama.com/format"
|
||||
"ollama.com/llm"
|
||||
"ollama.com/parser"
|
||||
"ollama.com/version"
|
||||
)
|
||||
|
||||
type registryOptions struct {
|
||||
|
||||
@@ -7,7 +7,7 @@ import (
|
||||
"text/template"
|
||||
"text/template/parse"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"ollama.com/api"
|
||||
)
|
||||
|
||||
// isResponseNode checks if the node contains .Response
|
||||
|
||||
@@ -4,7 +4,7 @@ import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"ollama.com/api"
|
||||
)
|
||||
|
||||
func TestPrompt(t *testing.T) {
|
||||
|
||||
@@ -27,12 +27,12 @@ import (
|
||||
"github.com/gin-gonic/gin"
|
||||
"golang.org/x/exp/slices"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/gpu"
|
||||
"github.com/ollama/ollama/llm"
|
||||
"github.com/ollama/ollama/openai"
|
||||
"github.com/ollama/ollama/parser"
|
||||
"github.com/ollama/ollama/version"
|
||||
"ollama.com/api"
|
||||
"ollama.com/gpu"
|
||||
"ollama.com/llm"
|
||||
"ollama.com/openai"
|
||||
"ollama.com/parser"
|
||||
"ollama.com/version"
|
||||
)
|
||||
|
||||
var mode string = gin.DebugMode
|
||||
|
||||
@@ -16,9 +16,9 @@ import (
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/parser"
|
||||
"github.com/ollama/ollama/version"
|
||||
"ollama.com/api"
|
||||
"ollama.com/parser"
|
||||
"ollama.com/version"
|
||||
)
|
||||
|
||||
func Test_Routes(t *testing.T) {
|
||||
|
||||
@@ -16,9 +16,9 @@ import (
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/format"
|
||||
"golang.org/x/sync/errgroup"
|
||||
"ollama.com/api"
|
||||
"ollama.com/format"
|
||||
)
|
||||
|
||||
var blobUploadManager sync.Map
|
||||
|
||||
@@ -12,7 +12,7 @@ import (
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/ollama/ollama/types/structs"
|
||||
"ollama.com/types/structs"
|
||||
)
|
||||
|
||||
// Errors
|
||||
@@ -185,8 +185,8 @@ func parseMask(s string) Name {
|
||||
return r
|
||||
}
|
||||
|
||||
func MustParseName(s, defaults string) Name {
|
||||
r := ParseName(s, "")
|
||||
func MustParseName(s, fill string) Name {
|
||||
r := ParseName(s, fill)
|
||||
if !r.IsValid() {
|
||||
panic("invalid Name: " + s)
|
||||
}
|
||||
@@ -521,6 +521,8 @@ func parts(s string) iter_Seq2[PartKind, string] {
|
||||
return
|
||||
}
|
||||
state, j, partLen = PartModel, i, 0
|
||||
case PartHost:
|
||||
// noop: support for host:port
|
||||
default:
|
||||
yield(PartExtraneous, s[i+1:j])
|
||||
return
|
||||
@@ -643,6 +645,15 @@ func (r Name) Filepath() string {
|
||||
return filepath.Join(r.parts[:]...)
|
||||
}
|
||||
|
||||
// FilepathNoBuild returns a complete, canonicalized, relative file path using
|
||||
// the parts of a complete Name, but without the build part.
|
||||
func (r Name) FilepathNoBuild() string {
|
||||
for i := range PartBuild {
|
||||
r.parts[i] = strings.ToLower(r.parts[i])
|
||||
}
|
||||
return filepath.Join(r.parts[:PartBuild]...)
|
||||
}
|
||||
|
||||
// isValidPart reports if s contains all valid characters for the given
|
||||
// part kind.
|
||||
func isValidPart(kind PartKind, s string) bool {
|
||||
@@ -669,6 +680,9 @@ func isValidByteFor(kind PartKind, c byte) bool {
|
||||
if kind == PartNamespace && c == '.' {
|
||||
return false
|
||||
}
|
||||
if kind == PartHost && c == ':' {
|
||||
return true
|
||||
}
|
||||
if c == '.' || c == '-' {
|
||||
return true
|
||||
}
|
||||
|
||||
@@ -40,6 +40,7 @@ var testNames = map[string]fields{
|
||||
"user/model": {namespace: "user", model: "model"},
|
||||
"example.com/ns/mistral:7b+Q4_0": {host: "example.com", namespace: "ns", model: "mistral", tag: "7b", build: "Q4_0"},
|
||||
"example.com/ns/mistral:7b+X": {host: "example.com", namespace: "ns", model: "mistral", tag: "7b", build: "X"},
|
||||
"localhost:5000/ns/mistral": {host: "localhost:5000", namespace: "ns", model: "mistral"},
|
||||
|
||||
// invalid digest
|
||||
"mistral:latest@invalid256-": {},
|
||||
@@ -318,6 +319,13 @@ func TestNameGoString(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestDisplayLongest(t *testing.T) {
|
||||
g := ParseName("example.com/library/mistral:latest+Q4_0", FillNothing).DisplayLongest()
|
||||
if g != "example.com/library/mistral:latest" {
|
||||
t.Errorf("got = %q; want %q", g, "example.com/library/mistral:latest")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDisplayShortest(t *testing.T) {
|
||||
cases := []struct {
|
||||
in string
|
||||
@@ -478,30 +486,36 @@ func TestNamePath(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestNameFromFilepath(t *testing.T) {
|
||||
func TestNameFilepath(t *testing.T) {
|
||||
cases := []struct {
|
||||
in string
|
||||
want string
|
||||
in string
|
||||
want string
|
||||
wantNoBuild string
|
||||
}{
|
||||
{
|
||||
in: "example.com/library/mistral:latest+Q4_0",
|
||||
want: "example.com/library/mistral/latest/Q4_0",
|
||||
in: "example.com/library/mistral:latest+Q4_0",
|
||||
want: "example.com/library/mistral/latest/Q4_0",
|
||||
wantNoBuild: "example.com/library/mistral/latest",
|
||||
},
|
||||
{
|
||||
in: "Example.Com/Library/Mistral:Latest+Q4_0",
|
||||
want: "example.com/library/mistral/latest/Q4_0",
|
||||
in: "Example.Com/Library/Mistral:Latest+Q4_0",
|
||||
want: "example.com/library/mistral/latest/Q4_0",
|
||||
wantNoBuild: "example.com/library/mistral/latest",
|
||||
},
|
||||
{
|
||||
in: "Example.Com/Library/Mistral:Latest+Q4_0",
|
||||
want: "example.com/library/mistral/latest/Q4_0",
|
||||
in: "Example.Com/Library/Mistral:Latest+Q4_0",
|
||||
want: "example.com/library/mistral/latest/Q4_0",
|
||||
wantNoBuild: "example.com/library/mistral/latest",
|
||||
},
|
||||
{
|
||||
in: "example.com/library/mistral:latest",
|
||||
want: "example.com/library/mistral/latest",
|
||||
in: "example.com/library/mistral:latest",
|
||||
want: "example.com/library/mistral/latest",
|
||||
wantNoBuild: "example.com/library/mistral/latest",
|
||||
},
|
||||
{
|
||||
in: "",
|
||||
want: "",
|
||||
in: "",
|
||||
want: "",
|
||||
wantNoBuild: "",
|
||||
},
|
||||
}
|
||||
for _, tt := range cases {
|
||||
@@ -513,6 +527,11 @@ func TestNameFromFilepath(t *testing.T) {
|
||||
if g != tt.want {
|
||||
t.Errorf("got = %q; want %q", g, tt.want)
|
||||
}
|
||||
g = p.FilepathNoBuild()
|
||||
g = filepath.ToSlash(g)
|
||||
if g != tt.wantNoBuild {
|
||||
t.Errorf("got = %q; want %q", g, tt.wantNoBuild)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user