Compare commits

...

15 Commits

Author SHA1 Message Date
Michael Yang
fcfbb06f1b cmd: handle sigint globally
This change also updates both client.do and client.stream to return
ctx.Err(). Previously this error is skipped so canceled contexts are
silently ignored
2025-02-19 10:46:25 -08:00
Michael Yang
e8d35d0de0 cmd: fix hide cursor
hides the cursor for the entire progress rather than each render cycle
2025-02-19 09:43:44 -08:00
Michael Yang
e13e7c8d94 Merge pull request #9079 from jeremyschlatter/main
cmd: fix flickering in progress bar
2025-02-18 22:59:29 +00:00
Jeremy Schlatter
78f403ff45 address code review comments 2025-02-18 14:50:09 -08:00
Michael Yang
08a299e1d0 cmake: avoid building intel backends on linux 2025-02-18 22:17:00 +00:00
Michael Yang
7b5d916a9a ci: set owner/group in tarball
set owner and group when building the linux tarball so extracted files
are consistent. this is the behaviour of release tarballs in version
0.5.7 and lower
2025-02-18 20:11:09 +00:00
benhaotang
33ad61b112 Add OpenDeepResearcher-via-searxng to Community Integrations (#9138) 2025-02-18 11:39:11 -08:00
L. Jiang
716e365615 test: add test cases for HumanNumber (#9108) 2025-02-18 11:35:26 -08:00
innightwolfsleep
3b4424ff98 readme: add LLM Telegram Bot to community integrations (#9150) 2025-02-18 10:04:30 -05:00
Jeremy Schlatter
f9c7ead160 cmd: eliminate flickering with synchronized output 2025-02-17 20:01:03 -08:00
Jeremy Schlatter
5930aaeb1a cmd: fix cursor flickering in progress bar
The previous commit fixed flickering in the progress bar itself. Cursor
flickering is harder to address.

Cursor flickering could be fixed by hiding the cursor altogether while
the progress bar is displayed. The downside of this is that if the
program is killed in such a way that it can't clean up its state, it
would leave the cursor invisible.

Instead, this commit introduces an output buffer. All of the escape
codes and content for a single progress update are written to a buffer,
which is then flushed to the terminal all at once. This significantly
decreases the time during which the terminal has seen the cursor-hiding
code but has not yet seen the cursor-showing code, thus minimizing (but
not 100% eliminating) cursor flickering.

For more context, see:
https://gitlab.gnome.org/GNOME/vte/-/issues/2837#note_2269501
2025-02-17 14:56:57 -08:00
Jeremy Schlatter
faf67db089 cmd: fix progress bar flickering
Previous code cleared the display before writing new content, creating a
window where the terminal could (and in some cases did) render empty lines.

Instead, we now write new content over the old content, only clearing
the trailing end of lines for cases where the new line is shorter.

Fixes #1664
2025-02-17 13:39:02 -08:00
James-William-Kincaid-III
0667baddc6 docs: fix incorrect shortcut key in windows.md (#9098) 2025-02-15 15:38:24 -05:00
Bruce MacDonald
d006e1e09b model: document high-level model interface (#9122) 2025-02-14 16:01:00 -08:00
Daniel Hiltgen
df2680b4b9 Wire up system info log for new engine (#9123) 2025-02-14 15:55:33 -08:00
14 changed files with 124 additions and 69 deletions

View File

@@ -329,7 +329,9 @@ jobs:
done
working-directory: dist/${{ matrix.os }}-${{ matrix.arch }}
- run: |
for ARCHIVE in dist/${{ matrix.os }}-${{ matrix.arch }}/*.tar.in; do tar c -C dist/${{ matrix.os }}-${{ matrix.arch }} -T $ARCHIVE | pigz -9vc >$(basename ${ARCHIVE//.*/}.tgz); done
for ARCHIVE in dist/${{ matrix.os }}-${{ matrix.arch }}/*.tar.in; do
tar c -C dist/${{ matrix.os }}-${{ matrix.arch }} -T $ARCHIVE --owner 0 --group 0 | pigz -9vc >$(basename ${ARCHIVE//.*/}.tgz);
done
- uses: actions/upload-artifact@v4
with:
name: dist-${{ matrix.os }}-${{ matrix.arch }}-${{ matrix.target }}

View File

@@ -24,7 +24,7 @@ set(GGML_LLAMAFILE ON)
set(GGML_CUDA_PEER_MAX_BATCH_SIZE 128)
set(GGML_CUDA_GRAPHS ON)
if((NOT CMAKE_OSX_ARCHITECTURES MATCHES "arm64")
if((CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_OSX_ARCHITECTURES MATCHES "arm64")
OR (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_SYSTEM_PROCESSOR MATCHES "arm|aarch64|ARM64|ARMv[0-9]+"))
set(GGML_CPU_ALL_VARIANTS ON)
endif()

View File

@@ -381,6 +381,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [ChibiChat](https://github.com/CosmicEventHorizon/ChibiChat) (Kotlin-based Android app to chat with Ollama and Koboldcpp API endpoints)
- [LocalLLM](https://github.com/qusaismael/localllm) (Minimal Web-App to run ollama models on it with a GUI)
- [Ollamazing](https://github.com/buiducnhat/ollamazing) (Web extension to run Ollama models)
- [OpenDeepResearcher-via-searxng](https://github.com/benhaotang/OpenDeepResearcher-via-searxng) (A Deep Research equivent endpoint with Ollama support for running locally)
### Cloud
@@ -548,6 +549,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [Alfred Ollama](https://github.com/zeitlings/alfred-ollama) (Alfred Workflow)
- [TextLLaMA](https://github.com/adarshM84/TextLLaMA) A Chrome Extension that helps you write emails, correct grammar, and translate into any language
- [Simple-Discord-AI](https://github.com/zyphixor/simple-discord-ai)
- [LLM Telegram Bot](https://github.com/innightwolfsleep/llm_telegram_bot) (telegram bot, primary for RP. Oobabooga-like buttons, [A1111](https://github.com/AUTOMATIC1111/stable-diffusion-webui) API integration e.t.c)
### Supported backends

View File

@@ -126,7 +126,8 @@ func (c *Client) do(ctx context.Context, method, path string, reqData, respData
return err
}
}
return nil
return ctx.Err()
}
const maxBufferSize = 512 * format.KiloByte
@@ -189,7 +190,7 @@ func (c *Client) stream(ctx context.Context, method, path string, data any, fn f
}
}
return nil
return ctx.Err()
}
// GenerateResponseFunc is a function that [Client.Generate] invokes every time

View File

@@ -15,13 +15,11 @@ import (
"net"
"net/http"
"os"
"os/signal"
"path/filepath"
"runtime"
"strconv"
"strings"
"sync/atomic"
"syscall"
"time"
"github.com/containerd/console"
@@ -330,6 +328,7 @@ func RunHandler(cmd *cobra.Command, args []string) error {
if err := PullHandler(cmd, []string{name}); err != nil {
return nil, err
}
return client.Show(cmd.Context(), &api.ShowRequest{Name: name})
}
return info, err
@@ -858,17 +857,6 @@ func chat(cmd *cobra.Command, opts runOptions) (*api.Message, error) {
spinner := progress.NewSpinner("")
p.Add("", spinner)
cancelCtx, cancel := context.WithCancel(cmd.Context())
defer cancel()
sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, syscall.SIGINT)
go func() {
<-sigChan
cancel()
}()
var state *displayResponseState = &displayResponseState{}
var latest api.ChatResponse
var fullResponse strings.Builder
@@ -903,10 +891,7 @@ func chat(cmd *cobra.Command, opts runOptions) (*api.Message, error) {
req.KeepAlive = opts.KeepAlive
}
if err := client.Chat(cancelCtx, req, fn); err != nil {
if errors.Is(err, context.Canceled) {
return nil, nil
}
if err := client.Chat(cmd.Context(), req, fn); err != nil {
return nil, err
}
@@ -946,17 +931,6 @@ func generate(cmd *cobra.Command, opts runOptions) error {
generateContext = []int{}
}
ctx, cancel := context.WithCancel(cmd.Context())
defer cancel()
sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, syscall.SIGINT)
go func() {
<-sigChan
cancel()
}()
var state *displayResponseState = &displayResponseState{}
fn := func(response api.GenerateResponse) error {
@@ -992,10 +966,7 @@ func generate(cmd *cobra.Command, opts runOptions) error {
KeepAlive: opts.KeepAlive,
}
if err := client.Generate(ctx, &request, fn); err != nil {
if errors.Is(err, context.Canceled) {
return nil
}
if err := client.Generate(cmd.Context(), &request, fn); err != nil {
return err
}
@@ -1017,8 +988,7 @@ func generate(cmd *cobra.Command, opts runOptions) error {
latest.Summary()
}
ctx = context.WithValue(cmd.Context(), generateContextKey("context"), latest.Context)
cmd.SetContext(ctx)
cmd.SetContext(context.WithValue(cmd.Context(), generateContextKey("context"), latest.Context))
return nil
}

View File

@@ -55,7 +55,7 @@ Here's a quick example showing API access from `powershell`
## Troubleshooting
Ollama on Windows stores files in a few different locations. You can view them in
the explorer window by hitting `<cmd>+R` and type in:
the explorer window by hitting `<Ctrl>+R` and type in:
- `explorer %LOCALAPPDATA%\Ollama` contains logs, and downloaded updates
- *app.log* contains most resent logs from the GUI application
- *server.log* contains the most recent server logs

View File

@@ -12,6 +12,9 @@ func TestHumanNumber(t *testing.T) {
testCases := []testCase{
{0, "0"},
{999, "999"},
{1000, "1K"},
{1001, "1K"},
{1000000, "1M"},
{125000000, "125M"},
{500500000, "500.50M"},

View File

@@ -305,6 +305,10 @@ func (b *testBackend) NewContext() ml.Context {
return &testContext{}
}
func (b *testBackend) SystemInfo() string {
return "not implemented"
}
type testContext struct{}
func (c *testContext) Zeros(dtype ml.DType, shape ...int) ml.Tensor {

14
main.go
View File

@@ -2,6 +2,8 @@ package main
import (
"context"
"os"
"os/signal"
"github.com/spf13/cobra"
@@ -9,5 +11,15 @@ import (
)
func main() {
cobra.CheckErr(cmd.NewCLI().ExecuteContext(context.Background()))
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, os.Interrupt)
go func() {
<-sigChan
cancel()
}()
cobra.CheckErr(cmd.NewCLI().ExecuteContext(ctx))
}

View File

@@ -23,6 +23,7 @@ type Backend interface {
Config() Config
Get(name string) Tensor
NewContext() Context
SystemInfo() string
}
var backends = make(map[string]func(*os.File) (Backend, error))

View File

@@ -1,11 +1,27 @@
package ggml
// #cgo CPPFLAGS: -I${SRCDIR}/ggml/include
// #include <stdlib.h>
// #include <stdint.h>
// #include "ggml.h"
// #include "ggml-cpu.h"
// #include "ggml-backend.h"
/*
#cgo CPPFLAGS: -I${SRCDIR}/ggml/include
#include <stdlib.h>
#include <stdint.h>
#include "ggml.h"
#include "ggml-cpu.h"
#include "ggml-backend.h"
static struct ggml_backend_feature * getBackendFeatures(void *fp, ggml_backend_reg_t reg) {return ((ggml_backend_get_features_t)(fp))(reg);}
static struct ggml_backend_feature * getNextBackendFeatures(struct ggml_backend_feature * feature) { return &feature[1];}
typedef enum {COMP_UNKNOWN,COMP_GCC,COMP_CLANG} COMPILER;
COMPILER inline get_compiler() {
#if defined(__clang__)
return COMP_CLANG;
#elif defined(__GNUC__)
return COMP_GCC;
#else
return UNKNOWN_COMPILER;
#endif
}
*/
import "C"
import (
@@ -626,3 +642,34 @@ func (t *Tensor) Conv2D(ctx ml.Context, t2 ml.Tensor, s0, s1, p0, p1, d0, d1 int
t: C.ggml_conv_2d(ctx.(*Context).ctx, t.t, t2.(*Tensor).t, C.int(s0), C.int(s1), C.int(p0), C.int(p1), C.int(d0), C.int(d1)),
}
}
func (b *Backend) SystemInfo() string {
var compiler string
switch C.get_compiler() {
case C.COMP_UNKNOWN:
compiler = "cgo(unknown_compiler)"
case C.COMP_GCC:
compiler = "cgo(gcc)"
case C.COMP_CLANG:
compiler = "cgo(clang)"
}
var s string
for i := range C.ggml_backend_reg_count() {
reg := C.ggml_backend_reg_get(i)
fName := C.CString("ggml_backend_get_features")
defer C.free(unsafe.Pointer(fName))
get_features_fn := C.ggml_backend_reg_get_proc_address(reg, fName)
if get_features_fn != nil {
s += C.GoString(C.ggml_backend_reg_name(reg))
s += " : "
for features := C.getBackendFeatures(get_features_fn, reg); features.name != nil; features = C.getNextBackendFeatures(features) {
s += C.GoString(features.name)
s += " = "
s += C.GoString(features.value)
s += " | "
}
}
}
return s + compiler
}

View File

@@ -21,6 +21,7 @@ import (
_ "github.com/ollama/ollama/ml/backend"
)
// Options contains the inputs for a model forward pass
type Options struct {
Inputs []int32
Positions []int32
@@ -34,11 +35,13 @@ type config struct {
Cache kvcache.Cache
}
// Base implements the common fields and methods for all models
type Base struct {
b ml.Backend
config
}
// Backend returns the underlying backend that will run the model
func (m *Base) Backend() ml.Backend {
return m.b
}
@@ -47,6 +50,7 @@ func (m *Base) Config() config {
return m.config
}
// Model implements a specific model architecture, defining the forward pass and any model-specific configuration
type Model interface {
Forward(ml.Context, Options) (ml.Tensor, error)
@@ -56,6 +60,7 @@ type Model interface {
var models = make(map[string]func(ml.Config) (Model, error))
// Register registers a model constructor for the given architecture
func Register(name string, f func(ml.Config) (Model, error)) {
if _, ok := models[name]; ok {
panic("model: model already registered")
@@ -64,8 +69,9 @@ func Register(name string, f func(ml.Config) (Model, error)) {
models[name] = f
}
func New(s string) (Model, error) {
r, err := os.Open(s)
// New initializes a new model instance with the provided configuration based on the metadata in the model file
func New(modelPath string) (Model, error) {
r, err := os.Open(modelPath)
if err != nil {
return nil, err
}

View File

@@ -1,6 +1,7 @@
package progress
import (
"bufio"
"fmt"
"io"
"sync"
@@ -13,7 +14,8 @@ type State interface {
type Progress struct {
mu sync.Mutex
w io.Writer
// buffer output to minimize flickering on all terminals
w *bufio.Writer
pos int
@@ -22,7 +24,7 @@ type Progress struct {
}
func NewProgress(w io.Writer) *Progress {
p := &Progress{w: w}
p := &Progress{w: bufio.NewWriter(w)}
go p.start()
return p
}
@@ -47,26 +49,29 @@ func (p *Progress) stop() bool {
func (p *Progress) Stop() bool {
stopped := p.stop()
if stopped {
fmt.Fprint(p.w, "\n")
fmt.Fprintln(p.w)
}
// show cursor
fmt.Fprint(p.w, "\033[?25h")
p.w.Flush()
return stopped
}
func (p *Progress) StopAndClear() bool {
fmt.Fprint(p.w, "\033[?25l")
defer fmt.Fprint(p.w, "\033[?25h")
stopped := p.stop()
if stopped {
// clear all progress lines
for i := range p.pos {
if i > 0 {
fmt.Fprint(p.w, "\033[A")
}
fmt.Fprint(p.w, "\033[2K\033[1G")
for range p.pos - 1 {
fmt.Fprint(p.w, "\033[A")
}
fmt.Fprint(p.w, "\033[2K", "\033[1G")
}
// show cursor
fmt.Fprint(p.w, "\033[?25h")
p.w.Flush()
return stopped
}
@@ -81,30 +86,31 @@ func (p *Progress) render() {
p.mu.Lock()
defer p.mu.Unlock()
fmt.Fprint(p.w, "\033[?25l")
defer fmt.Fprint(p.w, "\033[?25h")
fmt.Fprint(p.w, "\033[?2026h")
defer fmt.Fprint(p.w, "\033[?2026l")
// clear already rendered progress lines
for i := range p.pos {
if i > 0 {
fmt.Fprint(p.w, "\033[A")
}
fmt.Fprint(p.w, "\033[2K\033[1G")
for range p.pos - 1 {
fmt.Fprint(p.w, "\033[A")
}
fmt.Fprint(p.w, "\033[1G")
// render progress lines
for i, state := range p.states {
fmt.Fprint(p.w, state.String())
fmt.Fprint(p.w, state.String(), "\033[K")
if i < len(p.states)-1 {
fmt.Fprint(p.w, "\n")
}
}
p.pos = len(p.states)
p.w.Flush()
}
func (p *Progress) start() {
p.ticker = time.NewTicker(100 * time.Millisecond)
// hide cursor
fmt.Fprint(p.w, "\033[?25l")
for range p.ticker.C {
p.render()
}

View File

@@ -813,6 +813,8 @@ func (s *Server) loadModel(
panic(err)
}
slog.Info("system", "info", s.model.Backend().SystemInfo() /* "threads", *threads */)
// TODO(jessegross): LoRA loading
if lpath.String() != "" {
panic("loras are not yet implemented")
@@ -881,7 +883,6 @@ func Execute(args []string) error {
})
slog.SetDefault(slog.New(handler))
slog.Info("starting ollama engine")
// TODO(jessegross): Some system info would be useful
server := &Server{
batchSize: *batchSize,