Compare commits
24 Commits
parth/decr
...
mxyng/toke
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5f9edb46fc | ||
|
|
03bf241c33 | ||
|
|
a887406c24 | ||
|
|
d51e95ba7e | ||
|
|
3d01f2aa34 | ||
|
|
634c416645 | ||
|
|
57de86cc61 | ||
|
|
12719b6e87 | ||
|
|
a077d996e3 | ||
|
|
c23d5095de | ||
|
|
7601f0e93e | ||
|
|
aad3f03890 | ||
|
|
55d0b6e8b9 | ||
|
|
38eac40d56 | ||
|
|
80f3f1bc25 | ||
|
|
b1a0db547b | ||
|
|
75d7b5f926 | ||
|
|
349d814814 | ||
|
|
c8743031e0 | ||
|
|
4adb9cf4bb | ||
|
|
74f475e735 | ||
|
|
875cecba74 | ||
|
|
7d411a4686 | ||
|
|
02a2401596 |
@@ -190,7 +190,7 @@ if(MLX_ENGINE)
|
|||||||
install(TARGETS mlx mlxc
|
install(TARGETS mlx mlxc
|
||||||
RUNTIME_DEPENDENCIES
|
RUNTIME_DEPENDENCIES
|
||||||
DIRECTORIES ${CUDAToolkit_BIN_DIR} ${CUDAToolkit_BIN_DIR}/x64 ${CUDAToolkit_LIBRARY_DIR}
|
DIRECTORIES ${CUDAToolkit_BIN_DIR} ${CUDAToolkit_BIN_DIR}/x64 ${CUDAToolkit_LIBRARY_DIR}
|
||||||
PRE_INCLUDE_REGEXES cublas cublasLt cudart nvrtc cudnn nccl
|
PRE_INCLUDE_REGEXES cublas cublasLt cudart nvrtc nvrtc-builtins cudnn nccl openblas gfortran
|
||||||
PRE_EXCLUDE_REGEXES ".*"
|
PRE_EXCLUDE_REGEXES ".*"
|
||||||
RUNTIME DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT MLX
|
RUNTIME DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT MLX
|
||||||
LIBRARY DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT MLX
|
LIBRARY DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT MLX
|
||||||
|
|||||||
18
Dockerfile
@@ -32,7 +32,7 @@ ENV PATH=/${VULKANVERSION}/x86_64/bin:$PATH
|
|||||||
FROM --platform=linux/arm64 almalinux:8 AS base-arm64
|
FROM --platform=linux/arm64 almalinux:8 AS base-arm64
|
||||||
# install epel-release for ccache
|
# install epel-release for ccache
|
||||||
RUN yum install -y yum-utils epel-release \
|
RUN yum install -y yum-utils epel-release \
|
||||||
&& dnf install -y clang ccache \
|
&& dnf install -y clang ccache git \
|
||||||
&& yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo
|
&& yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo
|
||||||
ENV CC=clang CXX=clang++
|
ENV CC=clang CXX=clang++
|
||||||
|
|
||||||
@@ -149,6 +149,7 @@ COPY CMakeLists.txt CMakePresets.json .
|
|||||||
COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
|
COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
|
||||||
COPY x/ml/backend/mlx x/ml/backend/mlx
|
COPY x/ml/backend/mlx x/ml/backend/mlx
|
||||||
COPY go.mod go.sum .
|
COPY go.mod go.sum .
|
||||||
|
COPY MLX_VERSION .
|
||||||
RUN curl -fsSL https://golang.org/dl/go$(awk '/^go/ { print $2 }' go.mod).linux-$(case $(uname -m) in x86_64) echo amd64 ;; aarch64) echo arm64 ;; esac).tar.gz | tar xz -C /usr/local
|
RUN curl -fsSL https://golang.org/dl/go$(awk '/^go/ { print $2 }' go.mod).linux-$(case $(uname -m) in x86_64) echo amd64 ;; aarch64) echo arm64 ;; esac).tar.gz | tar xz -C /usr/local
|
||||||
ENV PATH=/usr/local/go/bin:$PATH
|
ENV PATH=/usr/local/go/bin:$PATH
|
||||||
RUN go mod download
|
RUN go mod download
|
||||||
@@ -156,14 +157,6 @@ RUN --mount=type=cache,target=/root/.ccache \
|
|||||||
cmake --preset 'MLX CUDA 13' -DBLAS_INCLUDE_DIRS=/usr/include/openblas -DLAPACK_INCLUDE_DIRS=/usr/include/openblas \
|
cmake --preset 'MLX CUDA 13' -DBLAS_INCLUDE_DIRS=/usr/include/openblas -DLAPACK_INCLUDE_DIRS=/usr/include/openblas \
|
||||||
&& cmake --build --parallel ${PARALLEL} --preset 'MLX CUDA 13' \
|
&& cmake --build --parallel ${PARALLEL} --preset 'MLX CUDA 13' \
|
||||||
&& cmake --install build --component MLX --strip --parallel ${PARALLEL}
|
&& cmake --install build --component MLX --strip --parallel ${PARALLEL}
|
||||||
COPY . .
|
|
||||||
ARG GOFLAGS="'-ldflags=-w -s'"
|
|
||||||
ENV CGO_ENABLED=1
|
|
||||||
ARG CGO_CFLAGS
|
|
||||||
ARG CGO_CXXFLAGS
|
|
||||||
RUN mkdir -p dist/bin
|
|
||||||
RUN --mount=type=cache,target=/root/.cache/go-build \
|
|
||||||
go build -tags mlx -trimpath -buildmode=pie -o dist/bin/ollama-mlx .
|
|
||||||
|
|
||||||
FROM base AS build
|
FROM base AS build
|
||||||
WORKDIR /go/src/github.com/ollama/ollama
|
WORKDIR /go/src/github.com/ollama/ollama
|
||||||
@@ -172,12 +165,14 @@ RUN curl -fsSL https://golang.org/dl/go$(awk '/^go/ { print $2 }' go.mod).linux-
|
|||||||
ENV PATH=/usr/local/go/bin:$PATH
|
ENV PATH=/usr/local/go/bin:$PATH
|
||||||
RUN go mod download
|
RUN go mod download
|
||||||
COPY . .
|
COPY . .
|
||||||
|
# Clone mlx-c headers for CGO (version from MLX_VERSION file)
|
||||||
|
RUN git clone --depth 1 --branch "$(cat MLX_VERSION)" https://github.com/ml-explore/mlx-c.git build/_deps/mlx-c-src
|
||||||
ARG GOFLAGS="'-ldflags=-w -s'"
|
ARG GOFLAGS="'-ldflags=-w -s'"
|
||||||
ENV CGO_ENABLED=1
|
ENV CGO_ENABLED=1
|
||||||
ARG CGO_CFLAGS
|
ENV CGO_CFLAGS="-I/go/src/github.com/ollama/ollama/build/_deps/mlx-c-src"
|
||||||
ARG CGO_CXXFLAGS
|
ARG CGO_CXXFLAGS
|
||||||
RUN --mount=type=cache,target=/root/.cache/go-build \
|
RUN --mount=type=cache,target=/root/.cache/go-build \
|
||||||
go build -trimpath -buildmode=pie -o /bin/ollama .
|
go build -tags mlx -trimpath -buildmode=pie -o /bin/ollama .
|
||||||
|
|
||||||
FROM --platform=linux/amd64 scratch AS amd64
|
FROM --platform=linux/amd64 scratch AS amd64
|
||||||
# COPY --from=cuda-11 dist/lib/ollama/ /lib/ollama/
|
# COPY --from=cuda-11 dist/lib/ollama/ /lib/ollama/
|
||||||
@@ -185,7 +180,6 @@ COPY --from=cuda-12 dist/lib/ollama /lib/ollama/
|
|||||||
COPY --from=cuda-13 dist/lib/ollama /lib/ollama/
|
COPY --from=cuda-13 dist/lib/ollama /lib/ollama/
|
||||||
COPY --from=vulkan dist/lib/ollama /lib/ollama/
|
COPY --from=vulkan dist/lib/ollama /lib/ollama/
|
||||||
COPY --from=mlx /go/src/github.com/ollama/ollama/dist/lib/ollama /lib/ollama/
|
COPY --from=mlx /go/src/github.com/ollama/ollama/dist/lib/ollama /lib/ollama/
|
||||||
COPY --from=mlx /go/src/github.com/ollama/ollama/dist/bin/ /bin/
|
|
||||||
|
|
||||||
FROM --platform=linux/arm64 scratch AS arm64
|
FROM --platform=linux/arm64 scratch AS arm64
|
||||||
# COPY --from=cuda-11 dist/lib/ollama/ /lib/ollama/
|
# COPY --from=cuda-11 dist/lib/ollama/ /lib/ollama/
|
||||||
|
|||||||
1
MLX_VERSION
Normal file
@@ -0,0 +1 @@
|
|||||||
|
v0.4.1
|
||||||
37
README.md
@@ -48,7 +48,7 @@ ollama run gemma3
|
|||||||
|
|
||||||
## Model library
|
## Model library
|
||||||
|
|
||||||
Ollama supports a list of models available on [ollama.com/library](https://ollama.com/library 'ollama model library')
|
Ollama supports a list of models available on [ollama.com/library](https://ollama.com/library "ollama model library")
|
||||||
|
|
||||||
Here are some example models that can be downloaded:
|
Here are some example models that can be downloaded:
|
||||||
|
|
||||||
@@ -260,6 +260,38 @@ Finally, in a separate shell, run a model:
|
|||||||
./ollama run llama3.2
|
./ollama run llama3.2
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Building with MLX (experimental)
|
||||||
|
|
||||||
|
First build the MLX libraries:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
cmake --preset MLX
|
||||||
|
cmake --build --preset MLX --parallel
|
||||||
|
cmake --install build --component MLX
|
||||||
|
```
|
||||||
|
|
||||||
|
When building with the `-tags mlx` flag, the main `ollama` binary includes MLX support for experimental features like image generation:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
go build -tags mlx .
|
||||||
|
```
|
||||||
|
|
||||||
|
Finally, start the server:
|
||||||
|
|
||||||
|
```
|
||||||
|
./ollama serve
|
||||||
|
```
|
||||||
|
|
||||||
|
### Building MLX with CUDA
|
||||||
|
|
||||||
|
When building with CUDA, use the preset "MLX CUDA 13" or "MLX CUDA 12" to enable CUDA with default architectures:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
cmake --preset 'MLX CUDA 13'
|
||||||
|
cmake --build --preset 'MLX CUDA 13' --parallel
|
||||||
|
cmake --install build --component MLX
|
||||||
|
```
|
||||||
|
|
||||||
## REST API
|
## REST API
|
||||||
|
|
||||||
Ollama has a REST API for running and managing models.
|
Ollama has a REST API for running and managing models.
|
||||||
@@ -290,6 +322,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
|||||||
|
|
||||||
### Web & Desktop
|
### Web & Desktop
|
||||||
|
|
||||||
|
- [Onyx](https://github.com/onyx-dot-app/onyx)
|
||||||
- [Open WebUI](https://github.com/open-webui/open-webui)
|
- [Open WebUI](https://github.com/open-webui/open-webui)
|
||||||
- [SwiftChat (macOS with ReactNative)](https://github.com/aws-samples/swift-chat)
|
- [SwiftChat (macOS with ReactNative)](https://github.com/aws-samples/swift-chat)
|
||||||
- [Enchanted (macOS native)](https://github.com/AugustDev/enchanted)
|
- [Enchanted (macOS native)](https://github.com/AugustDev/enchanted)
|
||||||
@@ -636,6 +669,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
|||||||
- [llama.cpp](https://github.com/ggml-org/llama.cpp) project founded by Georgi Gerganov.
|
- [llama.cpp](https://github.com/ggml-org/llama.cpp) project founded by Georgi Gerganov.
|
||||||
|
|
||||||
### Observability
|
### Observability
|
||||||
|
|
||||||
- [Opik](https://www.comet.com/docs/opik/cookbook/ollama) is an open-source platform to debug, evaluate, and monitor your LLM applications, RAG systems, and agentic workflows with comprehensive tracing, automated evaluations, and production-ready dashboards. Opik supports native integration to Ollama.
|
- [Opik](https://www.comet.com/docs/opik/cookbook/ollama) is an open-source platform to debug, evaluate, and monitor your LLM applications, RAG systems, and agentic workflows with comprehensive tracing, automated evaluations, and production-ready dashboards. Opik supports native integration to Ollama.
|
||||||
- [Lunary](https://lunary.ai/docs/integrations/ollama) is the leading open-source LLM observability platform. It provides a variety of enterprise-grade features such as real-time analytics, prompt templates management, PII masking, and comprehensive agent tracing.
|
- [Lunary](https://lunary.ai/docs/integrations/ollama) is the leading open-source LLM observability platform. It provides a variety of enterprise-grade features such as real-time analytics, prompt templates management, PII masking, and comprehensive agent tracing.
|
||||||
- [OpenLIT](https://github.com/openlit/openlit) is an OpenTelemetry-native tool for monitoring Ollama Applications & GPUs using traces and metrics.
|
- [OpenLIT](https://github.com/openlit/openlit) is an OpenTelemetry-native tool for monitoring Ollama Applications & GPUs using traces and metrics.
|
||||||
@@ -644,4 +678,5 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
|||||||
- [MLflow Tracing](https://mlflow.org/docs/latest/llms/tracing/index.html#automatic-tracing) is an open source LLM observability tool with a convenient API to log and visualize traces, making it easy to debug and evaluate GenAI applications.
|
- [MLflow Tracing](https://mlflow.org/docs/latest/llms/tracing/index.html#automatic-tracing) is an open source LLM observability tool with a convenient API to log and visualize traces, making it easy to debug and evaluate GenAI applications.
|
||||||
|
|
||||||
### Security
|
### Security
|
||||||
|
|
||||||
- [Ollama Fortress](https://github.com/ParisNeo/ollama_proxy_server)
|
- [Ollama Fortress](https://github.com/ParisNeo/ollama_proxy_server)
|
||||||
|
|||||||
28
api/types.go
@@ -127,6 +127,20 @@ type GenerateRequest struct {
|
|||||||
// each with an associated log probability. Only applies when Logprobs is true.
|
// each with an associated log probability. Only applies when Logprobs is true.
|
||||||
// Valid values are 0-20. Default is 0 (only return the selected token's logprob).
|
// Valid values are 0-20. Default is 0 (only return the selected token's logprob).
|
||||||
TopLogprobs int `json:"top_logprobs,omitempty"`
|
TopLogprobs int `json:"top_logprobs,omitempty"`
|
||||||
|
|
||||||
|
// Experimental: Image generation fields (may change or be removed)
|
||||||
|
|
||||||
|
// Width is the width of the generated image in pixels.
|
||||||
|
// Only used for image generation models.
|
||||||
|
Width int32 `json:"width,omitempty"`
|
||||||
|
|
||||||
|
// Height is the height of the generated image in pixels.
|
||||||
|
// Only used for image generation models.
|
||||||
|
Height int32 `json:"height,omitempty"`
|
||||||
|
|
||||||
|
// Steps is the number of diffusion steps for image generation.
|
||||||
|
// Only used for image generation models.
|
||||||
|
Steps int32 `json:"steps,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// ChatRequest describes a request sent by [Client.Chat].
|
// ChatRequest describes a request sent by [Client.Chat].
|
||||||
@@ -860,6 +874,20 @@ type GenerateResponse struct {
|
|||||||
// Logprobs contains log probability information for the generated tokens,
|
// Logprobs contains log probability information for the generated tokens,
|
||||||
// if requested via the Logprobs parameter.
|
// if requested via the Logprobs parameter.
|
||||||
Logprobs []Logprob `json:"logprobs,omitempty"`
|
Logprobs []Logprob `json:"logprobs,omitempty"`
|
||||||
|
|
||||||
|
// Experimental: Image generation fields (may change or be removed)
|
||||||
|
|
||||||
|
// Image contains a base64-encoded generated image.
|
||||||
|
// Only present for image generation models.
|
||||||
|
Image string `json:"image,omitempty"`
|
||||||
|
|
||||||
|
// Completed is the number of completed steps in image generation.
|
||||||
|
// Only present for image generation models during streaming.
|
||||||
|
Completed int64 `json:"completed,omitempty"`
|
||||||
|
|
||||||
|
// Total is the total number of steps for image generation.
|
||||||
|
// Only present for image generation models during streaming.
|
||||||
|
Total int64 `json:"total,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// ModelDetails provides details about a model.
|
// ModelDetails provides details about a model.
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ extern NSString *SystemWidePath;
|
|||||||
@interface AppDelegate () <NSWindowDelegate, WKNavigationDelegate, WKUIDelegate>
|
@interface AppDelegate () <NSWindowDelegate, WKNavigationDelegate, WKUIDelegate>
|
||||||
@property(strong, nonatomic) NSStatusItem *statusItem;
|
@property(strong, nonatomic) NSStatusItem *statusItem;
|
||||||
@property(assign, nonatomic) BOOL updateAvailable;
|
@property(assign, nonatomic) BOOL updateAvailable;
|
||||||
|
@property(assign, nonatomic) BOOL systemShutdownInProgress;
|
||||||
@end
|
@end
|
||||||
|
|
||||||
@implementation AppDelegate
|
@implementation AppDelegate
|
||||||
@@ -40,6 +41,13 @@ bool firstTimeRun,startHidden; // Set in run before initialization
|
|||||||
}
|
}
|
||||||
|
|
||||||
- (void)applicationDidFinishLaunching:(NSNotification *)aNotification {
|
- (void)applicationDidFinishLaunching:(NSNotification *)aNotification {
|
||||||
|
// Register for system shutdown/restart notification so we can allow termination
|
||||||
|
[[[NSWorkspace sharedWorkspace] notificationCenter]
|
||||||
|
addObserver:self
|
||||||
|
selector:@selector(systemWillPowerOff:)
|
||||||
|
name:NSWorkspaceWillPowerOffNotification
|
||||||
|
object:nil];
|
||||||
|
|
||||||
// if we're in development mode, set the app icon
|
// if we're in development mode, set the app icon
|
||||||
NSString *bundlePath = [[NSBundle mainBundle] bundlePath];
|
NSString *bundlePath = [[NSBundle mainBundle] bundlePath];
|
||||||
if (![bundlePath hasSuffix:@".app"]) {
|
if (![bundlePath hasSuffix:@".app"]) {
|
||||||
@@ -278,7 +286,18 @@ bool firstTimeRun,startHidden; // Set in run before initialization
|
|||||||
[NSApp activateIgnoringOtherApps:YES];
|
[NSApp activateIgnoringOtherApps:YES];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
- (void)systemWillPowerOff:(NSNotification *)notification {
|
||||||
|
// Set flag so applicationShouldTerminate: knows to allow termination.
|
||||||
|
// The system will call applicationShouldTerminate: after posting this notification.
|
||||||
|
self.systemShutdownInProgress = YES;
|
||||||
|
}
|
||||||
|
|
||||||
- (NSApplicationTerminateReply)applicationShouldTerminate:(NSApplication *)sender {
|
- (NSApplicationTerminateReply)applicationShouldTerminate:(NSApplication *)sender {
|
||||||
|
// Allow termination if the system is shutting down or restarting
|
||||||
|
if (self.systemShutdownInProgress) {
|
||||||
|
return NSTerminateNow;
|
||||||
|
}
|
||||||
|
// Otherwise just hide the app (for Cmd+Q, close button, etc.)
|
||||||
[NSApp hide:nil];
|
[NSApp hide:nil];
|
||||||
[NSApp setActivationPolicy:NSApplicationActivationPolicyAccessory];
|
[NSApp setActivationPolicy:NSApplicationActivationPolicyAccessory];
|
||||||
return NSTerminateCancel;
|
return NSTerminateCancel;
|
||||||
|
|||||||
93
cmd/cmd.go
@@ -46,8 +46,9 @@ import (
|
|||||||
"github.com/ollama/ollama/types/syncmap"
|
"github.com/ollama/ollama/types/syncmap"
|
||||||
"github.com/ollama/ollama/version"
|
"github.com/ollama/ollama/version"
|
||||||
xcmd "github.com/ollama/ollama/x/cmd"
|
xcmd "github.com/ollama/ollama/x/cmd"
|
||||||
|
"github.com/ollama/ollama/x/create"
|
||||||
|
xcreateclient "github.com/ollama/ollama/x/create/client"
|
||||||
"github.com/ollama/ollama/x/imagegen"
|
"github.com/ollama/ollama/x/imagegen"
|
||||||
imagegenclient "github.com/ollama/ollama/x/imagegen/client"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
const ConnectInstructions = "To sign in, navigate to:\n %s\n\n"
|
const ConnectInstructions = "To sign in, navigate to:\n %s\n\n"
|
||||||
@@ -93,15 +94,87 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
|
|||||||
p := progress.NewProgress(os.Stderr)
|
p := progress.NewProgress(os.Stderr)
|
||||||
defer p.Stop()
|
defer p.Stop()
|
||||||
|
|
||||||
|
// Validate model name early to fail fast
|
||||||
|
modelName := args[0]
|
||||||
|
name := model.ParseName(modelName)
|
||||||
|
if !name.IsValid() {
|
||||||
|
return fmt.Errorf("invalid model name: %s", modelName)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for --experimental flag for safetensors model creation
|
||||||
|
experimental, _ := cmd.Flags().GetBool("experimental")
|
||||||
|
if experimental {
|
||||||
|
// Get Modelfile content - either from -f flag or default to "FROM ."
|
||||||
|
var reader io.Reader
|
||||||
|
filename, err := getModelfileName(cmd)
|
||||||
|
if os.IsNotExist(err) || filename == "" {
|
||||||
|
// No Modelfile specified or found - use default
|
||||||
|
reader = strings.NewReader("FROM .\n")
|
||||||
|
} else if err != nil {
|
||||||
|
return err
|
||||||
|
} else {
|
||||||
|
f, err := os.Open(filename)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
reader = f
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse the Modelfile
|
||||||
|
modelfile, err := parser.ParseFile(reader)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to parse Modelfile: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract FROM path and configuration
|
||||||
|
var modelDir string
|
||||||
|
mfConfig := &xcreateclient.ModelfileConfig{}
|
||||||
|
|
||||||
|
for _, cmd := range modelfile.Commands {
|
||||||
|
switch cmd.Name {
|
||||||
|
case "model":
|
||||||
|
modelDir = cmd.Args
|
||||||
|
case "template":
|
||||||
|
mfConfig.Template = cmd.Args
|
||||||
|
case "system":
|
||||||
|
mfConfig.System = cmd.Args
|
||||||
|
case "license":
|
||||||
|
mfConfig.License = cmd.Args
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if modelDir == "" {
|
||||||
|
modelDir = "."
|
||||||
|
}
|
||||||
|
|
||||||
|
// Resolve relative paths based on Modelfile location
|
||||||
|
if !filepath.IsAbs(modelDir) && filename != "" {
|
||||||
|
modelDir = filepath.Join(filepath.Dir(filename), modelDir)
|
||||||
|
}
|
||||||
|
|
||||||
|
quantize, _ := cmd.Flags().GetString("quantize")
|
||||||
|
return xcreateclient.CreateModel(xcreateclient.CreateOptions{
|
||||||
|
ModelName: modelName,
|
||||||
|
ModelDir: modelDir,
|
||||||
|
Quantize: quantize,
|
||||||
|
Modelfile: mfConfig,
|
||||||
|
}, p)
|
||||||
|
}
|
||||||
|
|
||||||
var reader io.Reader
|
var reader io.Reader
|
||||||
|
|
||||||
filename, err := getModelfileName(cmd)
|
filename, err := getModelfileName(cmd)
|
||||||
if os.IsNotExist(err) {
|
if os.IsNotExist(err) {
|
||||||
if filename == "" {
|
if filename == "" {
|
||||||
// No Modelfile found - check if current directory is an image gen model
|
// No Modelfile found - check if current directory is an image gen model
|
||||||
if imagegen.IsTensorModelDir(".") {
|
if create.IsTensorModelDir(".") {
|
||||||
quantize, _ := cmd.Flags().GetString("quantize")
|
quantize, _ := cmd.Flags().GetString("quantize")
|
||||||
return imagegenclient.CreateModel(args[0], ".", quantize, p)
|
return xcreateclient.CreateModel(xcreateclient.CreateOptions{
|
||||||
|
ModelName: modelName,
|
||||||
|
ModelDir: ".",
|
||||||
|
Quantize: quantize,
|
||||||
|
}, p)
|
||||||
}
|
}
|
||||||
reader = strings.NewReader("FROM .\n")
|
reader = strings.NewReader("FROM .\n")
|
||||||
} else {
|
} else {
|
||||||
@@ -134,7 +207,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
|
|||||||
}
|
}
|
||||||
spinner.Stop()
|
spinner.Stop()
|
||||||
|
|
||||||
req.Model = args[0]
|
req.Model = modelName
|
||||||
quantize, _ := cmd.Flags().GetString("quantize")
|
quantize, _ := cmd.Flags().GetString("quantize")
|
||||||
if quantize != "" {
|
if quantize != "" {
|
||||||
req.Quantize = quantize
|
req.Quantize = quantize
|
||||||
@@ -527,7 +600,7 @@ func RunHandler(cmd *cobra.Command, args []string) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Check if this is an image generation model
|
// Check if this is an image generation model
|
||||||
if slices.Contains(info.Capabilities, model.CapabilityImageGeneration) {
|
if slices.Contains(info.Capabilities, model.CapabilityImage) {
|
||||||
if opts.Prompt == "" && !interactive {
|
if opts.Prompt == "" && !interactive {
|
||||||
return errors.New("image generation models require a prompt. Usage: ollama run " + name + " \"your prompt here\"")
|
return errors.New("image generation models require a prompt. Usage: ollama run " + name + " \"your prompt here\"")
|
||||||
}
|
}
|
||||||
@@ -1745,12 +1818,19 @@ func NewCLI() *cobra.Command {
|
|||||||
Use: "create MODEL",
|
Use: "create MODEL",
|
||||||
Short: "Create a model",
|
Short: "Create a model",
|
||||||
Args: cobra.ExactArgs(1),
|
Args: cobra.ExactArgs(1),
|
||||||
PreRunE: checkServerHeartbeat,
|
PreRunE: func(cmd *cobra.Command, args []string) error {
|
||||||
|
// Skip server check for experimental mode (writes directly to disk)
|
||||||
|
if experimental, _ := cmd.Flags().GetBool("experimental"); experimental {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return checkServerHeartbeat(cmd, args)
|
||||||
|
},
|
||||||
RunE: CreateHandler,
|
RunE: CreateHandler,
|
||||||
}
|
}
|
||||||
|
|
||||||
createCmd.Flags().StringP("file", "f", "", "Name of the Modelfile (default \"Modelfile\")")
|
createCmd.Flags().StringP("file", "f", "", "Name of the Modelfile (default \"Modelfile\")")
|
||||||
createCmd.Flags().StringP("quantize", "q", "", "Quantize model to this level (e.g. q4_K_M)")
|
createCmd.Flags().StringP("quantize", "q", "", "Quantize model to this level (e.g. q4_K_M)")
|
||||||
|
createCmd.Flags().Bool("experimental", false, "Enable experimental safetensors model creation")
|
||||||
|
|
||||||
showCmd := &cobra.Command{
|
showCmd := &cobra.Command{
|
||||||
Use: "show MODEL",
|
Use: "show MODEL",
|
||||||
@@ -1905,6 +1985,7 @@ func NewCLI() *cobra.Command {
|
|||||||
} {
|
} {
|
||||||
switch cmd {
|
switch cmd {
|
||||||
case runCmd:
|
case runCmd:
|
||||||
|
imagegen.AppendFlagsDocs(cmd)
|
||||||
appendEnvDocs(cmd, []envconfig.EnvVar{envVars["OLLAMA_HOST"], envVars["OLLAMA_NOHISTORY"]})
|
appendEnvDocs(cmd, []envconfig.EnvVar{envVars["OLLAMA_HOST"], envVars["OLLAMA_NOHISTORY"]})
|
||||||
case serveCmd:
|
case serveCmd:
|
||||||
appendEnvDocs(cmd, []envconfig.EnvVar{
|
appendEnvDocs(cmd, []envconfig.EnvVar{
|
||||||
|
|||||||
@@ -1555,7 +1555,7 @@ func TestShowInfoImageGen(t *testing.T) {
|
|||||||
ParameterSize: "10.3B",
|
ParameterSize: "10.3B",
|
||||||
QuantizationLevel: "FP8",
|
QuantizationLevel: "FP8",
|
||||||
},
|
},
|
||||||
Capabilities: []model.Capability{model.CapabilityImageGeneration},
|
Capabilities: []model.Capability{model.CapabilityImage},
|
||||||
Requires: "0.14.0",
|
Requires: "0.14.0",
|
||||||
}, false, &b)
|
}, false, &b)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@@ -116,7 +116,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
|
|||||||
Prompt: ">>> ",
|
Prompt: ">>> ",
|
||||||
AltPrompt: "... ",
|
AltPrompt: "... ",
|
||||||
Placeholder: "Send a message (/? for help)",
|
Placeholder: "Send a message (/? for help)",
|
||||||
AltPlaceholder: `Use """ to end multi-line input`,
|
AltPlaceholder: "Press Enter to send",
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
|
|||||||
62
docs/api.md
@@ -16,6 +16,7 @@
|
|||||||
- [Generate Embeddings](#generate-embeddings)
|
- [Generate Embeddings](#generate-embeddings)
|
||||||
- [List Running Models](#list-running-models)
|
- [List Running Models](#list-running-models)
|
||||||
- [Version](#version)
|
- [Version](#version)
|
||||||
|
- [Experimental: Image Generation](#image-generation-experimental)
|
||||||
|
|
||||||
## Conventions
|
## Conventions
|
||||||
|
|
||||||
@@ -58,6 +59,15 @@ Advanced parameters (optional):
|
|||||||
- `keep_alive`: controls how long the model will stay loaded into memory following the request (default: `5m`)
|
- `keep_alive`: controls how long the model will stay loaded into memory following the request (default: `5m`)
|
||||||
- `context` (deprecated): the context parameter returned from a previous request to `/generate`, this can be used to keep a short conversational memory
|
- `context` (deprecated): the context parameter returned from a previous request to `/generate`, this can be used to keep a short conversational memory
|
||||||
|
|
||||||
|
Experimental image generation parameters (for image generation models only):
|
||||||
|
|
||||||
|
> [!WARNING]
|
||||||
|
> These parameters are experimental and may change in future versions.
|
||||||
|
|
||||||
|
- `width`: width of the generated image in pixels
|
||||||
|
- `height`: height of the generated image in pixels
|
||||||
|
- `steps`: number of diffusion steps
|
||||||
|
|
||||||
#### Structured outputs
|
#### Structured outputs
|
||||||
|
|
||||||
Structured outputs are supported by providing a JSON schema in the `format` parameter. The model will generate a response that matches the schema. See the [structured outputs](#request-structured-outputs) example below.
|
Structured outputs are supported by providing a JSON schema in the `format` parameter. The model will generate a response that matches the schema. See the [structured outputs](#request-structured-outputs) example below.
|
||||||
@@ -1867,3 +1877,55 @@ curl http://localhost:11434/api/version
|
|||||||
"version": "0.5.1"
|
"version": "0.5.1"
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Experimental Features
|
||||||
|
|
||||||
|
### Image Generation (Experimental)
|
||||||
|
|
||||||
|
> [!WARNING]
|
||||||
|
> Image generation is experimental and may change in future versions.
|
||||||
|
|
||||||
|
Image generation is now supported through the standard `/api/generate` endpoint when using image generation models. The API automatically detects when an image generation model is being used.
|
||||||
|
|
||||||
|
See the [Generate a completion](#generate-a-completion) section for the full API documentation. The experimental image generation parameters (`width`, `height`, `steps`) are documented there.
|
||||||
|
|
||||||
|
#### Example
|
||||||
|
|
||||||
|
##### Request
|
||||||
|
|
||||||
|
```shell
|
||||||
|
curl http://localhost:11434/api/generate -d '{
|
||||||
|
"model": "x/z-image-turbo",
|
||||||
|
"prompt": "a sunset over mountains",
|
||||||
|
"width": 1024,
|
||||||
|
"height": 768
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
##### Response (streaming)
|
||||||
|
|
||||||
|
Progress updates during generation:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"model": "x/z-image-turbo",
|
||||||
|
"created_at": "2024-01-15T10:30:00.000000Z",
|
||||||
|
"completed": 5,
|
||||||
|
"total": 20,
|
||||||
|
"done": false
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
##### Final Response
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"model": "x/z-image-turbo",
|
||||||
|
"created_at": "2024-01-15T10:30:15.000000Z",
|
||||||
|
"image": "iVBORw0KGgoAAAANSUhEUg...",
|
||||||
|
"done": true,
|
||||||
|
"done_reason": "stop",
|
||||||
|
"total_duration": 15000000000,
|
||||||
|
"load_duration": 2000000000
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|||||||
@@ -21,6 +21,7 @@ ollama pull glm-4.7:cloud
|
|||||||
To use Ollama with tools that expect the Anthropic API (like Claude Code), set these environment variables:
|
To use Ollama with tools that expect the Anthropic API (like Claude Code), set these environment variables:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
|
export ANTHROPIC_AUTH_TOKEN=ollama # required but ignored
|
||||||
export ANTHROPIC_BASE_URL=http://localhost:11434
|
export ANTHROPIC_BASE_URL=http://localhost:11434
|
||||||
export ANTHROPIC_API_KEY=ollama # required but ignored
|
export ANTHROPIC_API_KEY=ollama # required but ignored
|
||||||
```
|
```
|
||||||
@@ -247,12 +248,13 @@ curl -X POST http://localhost:11434/v1/messages \
|
|||||||
[Claude Code](https://code.claude.com/docs/en/overview) can be configured to use Ollama as its backend:
|
[Claude Code](https://code.claude.com/docs/en/overview) can be configured to use Ollama as its backend:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
ANTHROPIC_BASE_URL=http://localhost:11434 ANTHROPIC_API_KEY=ollama claude --model qwen3-coder
|
ANTHROPIC_AUTH_TOKEN=ollama ANTHROPIC_BASE_URL=http://localhost:11434 ANTHROPIC_API_KEY=ollama claude --model qwen3-coder
|
||||||
```
|
```
|
||||||
|
|
||||||
Or set the environment variables in your shell profile:
|
Or set the environment variables in your shell profile:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
|
export ANTHROPIC_AUTH_TOKEN=ollama
|
||||||
export ANTHROPIC_BASE_URL=http://localhost:11434
|
export ANTHROPIC_BASE_URL=http://localhost:11434
|
||||||
export ANTHROPIC_API_KEY=ollama
|
export ANTHROPIC_API_KEY=ollama
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -275,6 +275,73 @@ curl -X POST http://localhost:11434/v1/chat/completions \
|
|||||||
- [x] `dimensions`
|
- [x] `dimensions`
|
||||||
- [ ] `user`
|
- [ ] `user`
|
||||||
|
|
||||||
|
### `/v1/images/generations` (experimental)
|
||||||
|
|
||||||
|
> Note: This endpoint is experimental and may change or be removed in future versions.
|
||||||
|
|
||||||
|
Generate images using image generation models.
|
||||||
|
|
||||||
|
<CodeGroup dropdown>
|
||||||
|
|
||||||
|
```python images.py
|
||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
|
client = OpenAI(
|
||||||
|
base_url='http://localhost:11434/v1/',
|
||||||
|
api_key='ollama', # required but ignored
|
||||||
|
)
|
||||||
|
|
||||||
|
response = client.images.generate(
|
||||||
|
model='x/z-image-turbo',
|
||||||
|
prompt='A cute robot learning to paint',
|
||||||
|
size='1024x1024',
|
||||||
|
response_format='b64_json',
|
||||||
|
)
|
||||||
|
print(response.data[0].b64_json[:50] + '...')
|
||||||
|
```
|
||||||
|
|
||||||
|
```javascript images.js
|
||||||
|
import OpenAI from "openai";
|
||||||
|
|
||||||
|
const openai = new OpenAI({
|
||||||
|
baseURL: "http://localhost:11434/v1/",
|
||||||
|
apiKey: "ollama", // required but ignored
|
||||||
|
});
|
||||||
|
|
||||||
|
const response = await openai.images.generate({
|
||||||
|
model: "x/z-image-turbo",
|
||||||
|
prompt: "A cute robot learning to paint",
|
||||||
|
size: "1024x1024",
|
||||||
|
response_format: "b64_json",
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(response.data[0].b64_json.slice(0, 50) + "...");
|
||||||
|
```
|
||||||
|
|
||||||
|
```shell images.sh
|
||||||
|
curl -X POST http://localhost:11434/v1/images/generations \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"model": "x/z-image-turbo",
|
||||||
|
"prompt": "A cute robot learning to paint",
|
||||||
|
"size": "1024x1024",
|
||||||
|
"response_format": "b64_json"
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
</CodeGroup>
|
||||||
|
|
||||||
|
#### Supported request fields
|
||||||
|
|
||||||
|
- [x] `model`
|
||||||
|
- [x] `prompt`
|
||||||
|
- [x] `size` (e.g. "1024x1024")
|
||||||
|
- [x] `response_format` (only `b64_json` supported)
|
||||||
|
- [ ] `n`
|
||||||
|
- [ ] `quality`
|
||||||
|
- [ ] `style`
|
||||||
|
- [ ] `user`
|
||||||
|
|
||||||
### `/v1/responses`
|
### `/v1/responses`
|
||||||
|
|
||||||
> Note: Added in Ollama v0.13.3
|
> Note: Added in Ollama v0.13.3
|
||||||
|
|||||||
@@ -110,7 +110,7 @@ More Ollama [Python example](https://github.com/ollama/ollama-python/blob/main/e
|
|||||||
import { Ollama } from "ollama";
|
import { Ollama } from "ollama";
|
||||||
|
|
||||||
const client = new Ollama();
|
const client = new Ollama();
|
||||||
const results = await client.webSearch({ query: "what is ollama?" });
|
const results = await client.webSearch("what is ollama?");
|
||||||
console.log(JSON.stringify(results, null, 2));
|
console.log(JSON.stringify(results, null, 2));
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -213,7 +213,7 @@ models](https://ollama.com/models)\n\nAvailable for macOS, Windows, and Linux',
|
|||||||
import { Ollama } from "ollama";
|
import { Ollama } from "ollama";
|
||||||
|
|
||||||
const client = new Ollama();
|
const client = new Ollama();
|
||||||
const fetchResult = await client.webFetch({ url: "https://ollama.com" });
|
const fetchResult = await client.webFetch("https://ollama.com");
|
||||||
console.log(JSON.stringify(fetchResult, null, 2));
|
console.log(JSON.stringify(fetchResult, null, 2));
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
@@ -111,7 +111,9 @@
|
|||||||
"/integrations/zed",
|
"/integrations/zed",
|
||||||
"/integrations/roo-code",
|
"/integrations/roo-code",
|
||||||
"/integrations/n8n",
|
"/integrations/n8n",
|
||||||
"/integrations/xcode"
|
"/integrations/xcode",
|
||||||
|
"/integrations/onyx",
|
||||||
|
"/integrations/marimo"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ Please refer to the [GPU docs](./gpu).
|
|||||||
|
|
||||||
## How can I specify the context window size?
|
## How can I specify the context window size?
|
||||||
|
|
||||||
By default, Ollama uses a context window size of 2048 tokens.
|
By default, Ollama uses a context window size of 4096 tokens.
|
||||||
|
|
||||||
This can be overridden with the `OLLAMA_CONTEXT_LENGTH` environment variable. For example, to set the default context window to 8K, use:
|
This can be overridden with the `OLLAMA_CONTEXT_LENGTH` environment variable. For example, to set the default context window to 8K, use:
|
||||||
|
|
||||||
|
|||||||
BIN
docs/images/marimo-add-model.png
Normal file
|
After Width: | Height: | Size: 174 KiB |
BIN
docs/images/marimo-chat.png
Normal file
|
After Width: | Height: | Size: 80 KiB |
BIN
docs/images/marimo-code-completion.png
Normal file
|
After Width: | Height: | Size: 230 KiB |
BIN
docs/images/marimo-models.png
Normal file
|
After Width: | Height: | Size: 178 KiB |
BIN
docs/images/marimo-settings.png
Normal file
|
After Width: | Height: | Size: 186 KiB |
BIN
docs/images/onyx-login.png
Normal file
|
After Width: | Height: | Size: 100 KiB |
BIN
docs/images/onyx-ollama-form.png
Normal file
|
After Width: | Height: | Size: 306 KiB |
BIN
docs/images/onyx-ollama-llm.png
Normal file
|
After Width: | Height: | Size: 300 KiB |
BIN
docs/images/onyx-query.png
Normal file
|
After Width: | Height: | Size: 211 KiB |
@@ -2,6 +2,12 @@
|
|||||||
title: Claude Code
|
title: Claude Code
|
||||||
---
|
---
|
||||||
|
|
||||||
|
Claude Code is Anthropic's agentic coding tool that can read, modify, and execute code in your working directory.
|
||||||
|
|
||||||
|
Open models can be used with Claude Code through Ollama's Anthropic-compatible API, enabling you to use models such as `qwen3-coder`, `gpt-oss:20b`, or other models.
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
## Install
|
## Install
|
||||||
|
|
||||||
Install [Claude Code](https://code.claude.com/docs/en/overview):
|
Install [Claude Code](https://code.claude.com/docs/en/overview):
|
||||||
@@ -25,22 +31,24 @@ Claude Code connects to Ollama using the Anthropic-compatible API.
|
|||||||
1. Set the environment variables:
|
1. Set the environment variables:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
|
export ANTHROPIC_AUTH_TOKEN=ollama
|
||||||
export ANTHROPIC_BASE_URL=http://localhost:11434
|
export ANTHROPIC_BASE_URL=http://localhost:11434
|
||||||
export ANTHROPIC_API_KEY=ollama
|
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Run Claude Code with an Ollama model:
|
2. Run Claude Code with an Ollama model:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
claude --model qwen3-coder
|
claude --model gpt-oss:20b
|
||||||
```
|
```
|
||||||
|
|
||||||
Or run with environment variables inline:
|
Or run with environment variables inline:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
ANTHROPIC_BASE_URL=http://localhost:11434 ANTHROPIC_API_KEY=ollama claude --model qwen3-coder
|
ANTHROPIC_AUTH_TOKEN=ollama ANTHROPIC_BASE_URL=http://localhost:11434 claude --model gpt-oss:20b
|
||||||
```
|
```
|
||||||
|
|
||||||
|
**Note:** Claude Code requires a large context window. We recommend at least 32K tokens. See the [context length documentation](/context-length) for how to adjust context length in Ollama.
|
||||||
|
|
||||||
## Connecting to ollama.com
|
## Connecting to ollama.com
|
||||||
|
|
||||||
1. Create an [API key](https://ollama.com/settings/keys) on ollama.com
|
1. Create an [API key](https://ollama.com/settings/keys) on ollama.com
|
||||||
@@ -67,3 +75,4 @@ claude --model glm-4.7:cloud
|
|||||||
### Local models
|
### Local models
|
||||||
- `qwen3-coder` - Excellent for coding tasks
|
- `qwen3-coder` - Excellent for coding tasks
|
||||||
- `gpt-oss:20b` - Strong general-purpose model
|
- `gpt-oss:20b` - Strong general-purpose model
|
||||||
|
- `gpt-oss:120b` - Larger general-purpose model for more complex tasks
|
||||||
73
docs/integrations/marimo.mdx
Normal file
@@ -0,0 +1,73 @@
|
|||||||
|
---
|
||||||
|
title: marimo
|
||||||
|
---
|
||||||
|
|
||||||
|
## Install
|
||||||
|
|
||||||
|
Install [marimo](https://marimo.io). You can use `pip` or `uv` for this. You
|
||||||
|
can also use `uv` to create a sandboxed environment for marimo by running:
|
||||||
|
|
||||||
|
```
|
||||||
|
uvx marimo edit --sandbox notebook.py
|
||||||
|
```
|
||||||
|
|
||||||
|
## Usage with Ollama
|
||||||
|
|
||||||
|
1. In marimo, go to the user settings and go to the AI tab. From here
|
||||||
|
you can find and configure Ollama as an AI provider. For local use you
|
||||||
|
would typically point the base url to `http://localhost:11434/v1`.
|
||||||
|
|
||||||
|
<div style={{ display: 'flex', justifyContent: 'center' }}>
|
||||||
|
<img
|
||||||
|
src="/images/marimo-settings.png"
|
||||||
|
alt="Ollama settings in marimo"
|
||||||
|
width="50%"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
2. Once the AI provider is set up, you can turn on/off specific AI models you'd like to access.
|
||||||
|
|
||||||
|
<div style={{ display: 'flex', justifyContent: 'center' }}>
|
||||||
|
<img
|
||||||
|
src="/images/marimo-models.png"
|
||||||
|
alt="Selecting an Ollama model"
|
||||||
|
width="50%"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
3. You can also add a model to the list of available models by scrolling to the bottom and using the UI there.
|
||||||
|
|
||||||
|
<div style={{ display: 'flex', justifyContent: 'center' }}>
|
||||||
|
<img
|
||||||
|
src="/images/marimo-add-model.png"
|
||||||
|
alt="Adding a new Ollama model"
|
||||||
|
width="50%"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
4. Once configured, you can now use Ollama for AI chats in marimo.
|
||||||
|
|
||||||
|
<div style={{ display: 'flex', justifyContent: 'center' }}>
|
||||||
|
<img
|
||||||
|
src="/images/marimo-chat.png"
|
||||||
|
alt="Configure code completion"
|
||||||
|
width="50%"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
4. Alternatively, you can now use Ollama for **inline code completion** in marimo. This can be configured in the "AI Features" tab.
|
||||||
|
|
||||||
|
<div style={{ display: 'flex', justifyContent: 'center' }}>
|
||||||
|
<img
|
||||||
|
src="/images/marimo-code-completion.png"
|
||||||
|
alt="Configure code completion"
|
||||||
|
width="50%"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
## Connecting to ollama.com
|
||||||
|
|
||||||
|
1. Sign in to ollama cloud via `ollama signin`
|
||||||
|
2. In the ollama model settings add a model that ollama hosts, like `gpt-oss:120b`.
|
||||||
|
3. You can now refer to this model in marimo!
|
||||||
63
docs/integrations/onyx.mdx
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
---
|
||||||
|
title: Onyx
|
||||||
|
---
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
[Onyx](http://onyx.app/) is a self-hostable Chat UI that integrates with all Ollama models. Features include:
|
||||||
|
- Creating custom Agents
|
||||||
|
- Web search
|
||||||
|
- Deep Research
|
||||||
|
- RAG over uploaded documents and connected apps
|
||||||
|
- Connectors to applications like Google Drive, Email, Slack, etc.
|
||||||
|
- MCP and OpenAPI Actions support
|
||||||
|
- Image generation
|
||||||
|
- User/Groups management, RBAC, SSO, etc.
|
||||||
|
|
||||||
|
Onyx can be deployed for single users or large organizations.
|
||||||
|
|
||||||
|
## Install Onyx
|
||||||
|
|
||||||
|
Deploy Onyx with the [quickstart guide](https://docs.onyx.app/deployment/getting_started/quickstart).
|
||||||
|
|
||||||
|
<Info>
|
||||||
|
Resourcing/scaling docs [here](https://docs.onyx.app/deployment/getting_started/resourcing).
|
||||||
|
</Info>
|
||||||
|
|
||||||
|
## Usage with Ollama
|
||||||
|
|
||||||
|
1. Login to your Onyx deployment (create an account first).
|
||||||
|
<div style={{ display: 'flex', justifyContent: 'center' }}>
|
||||||
|
<img
|
||||||
|
src="/images/onyx-login.png"
|
||||||
|
alt="Onyx Login Page"
|
||||||
|
width="75%"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
2. In the set-up process select `Ollama` as the LLM provider.
|
||||||
|
<div style={{ display: 'flex', justifyContent: 'center' }}>
|
||||||
|
<img
|
||||||
|
src="/images/onyx-ollama-llm.png"
|
||||||
|
alt="Onyx Set Up Form"
|
||||||
|
width="75%"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
3. Provide your **Ollama API URL** and select your models.
|
||||||
|
<Note>If you're running Onyx in Docker, to access your computer's local network use `http://host.docker.internal` instead of `http://127.0.0.1`.</Note>
|
||||||
|
<div style={{ display: 'flex', justifyContent: 'center' }}>
|
||||||
|
<img
|
||||||
|
src="/images/onyx-ollama-form.png"
|
||||||
|
alt="Selecting Ollama Models"
|
||||||
|
width="75%"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
You can also easily connect up Onyx Cloud with the `Ollama Cloud` tab of the setup.
|
||||||
|
|
||||||
|
## Send your first query
|
||||||
|
<div style={{ display: 'flex', justifyContent: 'center' }}>
|
||||||
|
<img
|
||||||
|
src="/images/onyx-query.png"
|
||||||
|
alt="Onyx Query Example"
|
||||||
|
width="75%"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
---
|
---
|
||||||
title: "Linux"
|
title: Linux
|
||||||
---
|
---
|
||||||
|
|
||||||
## Install
|
## Install
|
||||||
@@ -13,14 +13,15 @@ curl -fsSL https://ollama.com/install.sh | sh
|
|||||||
## Manual install
|
## Manual install
|
||||||
|
|
||||||
<Note>
|
<Note>
|
||||||
If you are upgrading from a prior version, you should remove the old libraries with `sudo rm -rf /usr/lib/ollama` first.
|
If you are upgrading from a prior version, you should remove the old libraries
|
||||||
|
with `sudo rm -rf /usr/lib/ollama` first.
|
||||||
</Note>
|
</Note>
|
||||||
|
|
||||||
Download and extract the package:
|
Download and extract the package:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl -fsSL https://ollama.com/download/ollama-linux-amd64.tgz \
|
curl -fsSL https://ollama.com/download/ollama-linux-amd64.tar.zst \
|
||||||
| sudo tar zx -C /usr
|
| sudo tar x -C /usr
|
||||||
```
|
```
|
||||||
|
|
||||||
Start Ollama:
|
Start Ollama:
|
||||||
@@ -40,8 +41,8 @@ ollama -v
|
|||||||
If you have an AMD GPU, also download and extract the additional ROCm package:
|
If you have an AMD GPU, also download and extract the additional ROCm package:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl -fsSL https://ollama.com/download/ollama-linux-amd64-rocm.tgz \
|
curl -fsSL https://ollama.com/download/ollama-linux-amd64-rocm.tar.zst \
|
||||||
| sudo tar zx -C /usr
|
| sudo tar x -C /usr
|
||||||
```
|
```
|
||||||
|
|
||||||
### ARM64 install
|
### ARM64 install
|
||||||
@@ -49,8 +50,8 @@ curl -fsSL https://ollama.com/download/ollama-linux-amd64-rocm.tgz \
|
|||||||
Download and extract the ARM64-specific package:
|
Download and extract the ARM64-specific package:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl -fsSL https://ollama.com/download/ollama-linux-arm64.tgz \
|
curl -fsSL https://ollama.com/download/ollama-linux-arm64.tar.zst \
|
||||||
| sudo tar zx -C /usr
|
| sudo tar x -C /usr
|
||||||
```
|
```
|
||||||
|
|
||||||
### Adding Ollama as a startup service (recommended)
|
### Adding Ollama as a startup service (recommended)
|
||||||
@@ -112,7 +113,11 @@ sudo systemctl status ollama
|
|||||||
```
|
```
|
||||||
|
|
||||||
<Note>
|
<Note>
|
||||||
While AMD has contributed the `amdgpu` driver upstream to the official linux kernel source, the version is older and may not support all ROCm features. We recommend you install the latest driver from https://www.amd.com/en/support/linux-drivers for best support of your Radeon GPU.
|
While AMD has contributed the `amdgpu` driver upstream to the official linux
|
||||||
|
kernel source, the version is older and may not support all ROCm features. We
|
||||||
|
recommend you install the latest driver from
|
||||||
|
https://www.amd.com/en/support/linux-drivers for best support of your Radeon
|
||||||
|
GPU.
|
||||||
</Note>
|
</Note>
|
||||||
|
|
||||||
## Customizing
|
## Customizing
|
||||||
@@ -141,8 +146,8 @@ curl -fsSL https://ollama.com/install.sh | sh
|
|||||||
Or by re-downloading Ollama:
|
Or by re-downloading Ollama:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl -fsSL https://ollama.com/download/ollama-linux-amd64.tgz \
|
curl -fsSL https://ollama.com/download/ollama-linux-amd64.tar.zst \
|
||||||
| sudo tar zx -C /usr
|
| sudo tar x -C /usr
|
||||||
```
|
```
|
||||||
|
|
||||||
## Installing specific versions
|
## Installing specific versions
|
||||||
|
|||||||
@@ -131,7 +131,7 @@ func TestAPIToolCalling(t *testing.T) {
|
|||||||
t.Errorf("unexpected tool called: got %q want %q", lastToolCall.Function.Name, "get_weather")
|
t.Errorf("unexpected tool called: got %q want %q", lastToolCall.Function.Name, "get_weather")
|
||||||
}
|
}
|
||||||
|
|
||||||
if _, ok := lastToolCall.Function.Arguments["location"]; !ok {
|
if _, ok := lastToolCall.Function.Arguments.Get("location"); !ok {
|
||||||
t.Errorf("expected tool arguments to include 'location', got: %s", lastToolCall.Function.Arguments.String())
|
t.Errorf("expected tool arguments to include 'location', got: %s", lastToolCall.Function.Arguments.String())
|
||||||
}
|
}
|
||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
|
|||||||
@@ -34,6 +34,7 @@ import (
|
|||||||
"github.com/ollama/ollama/logutil"
|
"github.com/ollama/ollama/logutil"
|
||||||
"github.com/ollama/ollama/ml"
|
"github.com/ollama/ollama/ml"
|
||||||
"github.com/ollama/ollama/model"
|
"github.com/ollama/ollama/model"
|
||||||
|
"github.com/ollama/ollama/tokenizers"
|
||||||
)
|
)
|
||||||
|
|
||||||
type filteredEnv []string
|
type filteredEnv []string
|
||||||
@@ -115,7 +116,7 @@ type llamaServer struct {
|
|||||||
type ollamaServer struct {
|
type ollamaServer struct {
|
||||||
llmServer
|
llmServer
|
||||||
|
|
||||||
textProcessor model.TextProcessor // textProcessor handles text encoding/decoding
|
tokenizer tokenizers.Tokenizer // textProcessor handles text encoding/decoding
|
||||||
}
|
}
|
||||||
|
|
||||||
// LoadModel will load a model from disk. The model must be in the GGML format.
|
// LoadModel will load a model from disk. The model must be in the GGML format.
|
||||||
@@ -141,11 +142,11 @@ func LoadModel(model string, maxArraySize int) (*ggml.GGML, error) {
|
|||||||
// NewLlamaServer will run a server for the given GPUs
|
// NewLlamaServer will run a server for the given GPUs
|
||||||
func NewLlamaServer(systemInfo ml.SystemInfo, gpus []ml.DeviceInfo, modelPath string, f *ggml.GGML, adapters, projectors []string, opts api.Options, numParallel int) (LlamaServer, error) {
|
func NewLlamaServer(systemInfo ml.SystemInfo, gpus []ml.DeviceInfo, modelPath string, f *ggml.GGML, adapters, projectors []string, opts api.Options, numParallel int) (LlamaServer, error) {
|
||||||
var llamaModel *llama.Model
|
var llamaModel *llama.Model
|
||||||
var textProcessor model.TextProcessor
|
var tokenizer tokenizers.Tokenizer
|
||||||
var err error
|
var err error
|
||||||
if envconfig.NewEngine() || f.KV().OllamaEngineRequired() {
|
if envconfig.NewEngine() || f.KV().OllamaEngineRequired() {
|
||||||
if len(projectors) == 0 {
|
if len(projectors) == 0 {
|
||||||
textProcessor, err = model.NewTextProcessor(modelPath)
|
tokenizer, err = model.NewTextProcessor(modelPath)
|
||||||
} else {
|
} else {
|
||||||
err = errors.New("split vision models aren't supported")
|
err = errors.New("split vision models aren't supported")
|
||||||
}
|
}
|
||||||
@@ -154,7 +155,7 @@ func NewLlamaServer(systemInfo ml.SystemInfo, gpus []ml.DeviceInfo, modelPath st
|
|||||||
slog.Debug("model not yet supported by Ollama engine, switching to compatibility mode", "model", modelPath, "error", err)
|
slog.Debug("model not yet supported by Ollama engine, switching to compatibility mode", "model", modelPath, "error", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if textProcessor == nil {
|
if tokenizer == nil {
|
||||||
llamaModel, err = llama.LoadModelFromFile(modelPath, llama.ModelParams{VocabOnly: true})
|
llamaModel, err = llama.LoadModelFromFile(modelPath, llama.ModelParams{VocabOnly: true})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
@@ -210,7 +211,7 @@ func NewLlamaServer(systemInfo ml.SystemInfo, gpus []ml.DeviceInfo, modelPath st
|
|||||||
|
|
||||||
kvct := strings.ToLower(envconfig.KvCacheType())
|
kvct := strings.ToLower(envconfig.KvCacheType())
|
||||||
|
|
||||||
if textProcessor == nil {
|
if tokenizer == nil {
|
||||||
flashAttention := ml.FlashAttentionAuto
|
flashAttention := ml.FlashAttentionAuto
|
||||||
if faUserSet {
|
if faUserSet {
|
||||||
if fa {
|
if fa {
|
||||||
@@ -260,7 +261,7 @@ func NewLlamaServer(systemInfo ml.SystemInfo, gpus []ml.DeviceInfo, modelPath st
|
|||||||
gpuLibs := ml.LibraryPaths(gpus)
|
gpuLibs := ml.LibraryPaths(gpus)
|
||||||
status := NewStatusWriter(os.Stderr)
|
status := NewStatusWriter(os.Stderr)
|
||||||
cmd, port, err := StartRunner(
|
cmd, port, err := StartRunner(
|
||||||
textProcessor != nil,
|
tokenizer != nil,
|
||||||
modelPath,
|
modelPath,
|
||||||
gpuLibs,
|
gpuLibs,
|
||||||
status,
|
status,
|
||||||
@@ -309,8 +310,8 @@ func NewLlamaServer(systemInfo ml.SystemInfo, gpus []ml.DeviceInfo, modelPath st
|
|||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
if textProcessor != nil {
|
if tokenizer != nil {
|
||||||
return &ollamaServer{llmServer: s, textProcessor: textProcessor}, nil
|
return &ollamaServer{llmServer: s, tokenizer: tokenizer}, nil
|
||||||
} else {
|
} else {
|
||||||
return &llamaServer{llmServer: s, ggml: f}, nil
|
return &llamaServer{llmServer: s, ggml: f}, nil
|
||||||
}
|
}
|
||||||
@@ -1464,6 +1465,12 @@ type CompletionRequest struct {
|
|||||||
|
|
||||||
// TopLogprobs specifies the number of most likely alternative tokens to return (0-20)
|
// TopLogprobs specifies the number of most likely alternative tokens to return (0-20)
|
||||||
TopLogprobs int
|
TopLogprobs int
|
||||||
|
|
||||||
|
// Image generation fields
|
||||||
|
Width int32 `json:"width,omitempty"`
|
||||||
|
Height int32 `json:"height,omitempty"`
|
||||||
|
Steps int32 `json:"steps,omitempty"`
|
||||||
|
Seed int64 `json:"seed,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// DoneReason represents the reason why a completion response is done
|
// DoneReason represents the reason why a completion response is done
|
||||||
@@ -1512,6 +1519,15 @@ type CompletionResponse struct {
|
|||||||
|
|
||||||
// Logprobs contains log probability information if requested
|
// Logprobs contains log probability information if requested
|
||||||
Logprobs []Logprob `json:"logprobs,omitempty"`
|
Logprobs []Logprob `json:"logprobs,omitempty"`
|
||||||
|
|
||||||
|
// Image contains base64-encoded image data for image generation
|
||||||
|
Image string `json:"image,omitempty"`
|
||||||
|
|
||||||
|
// Step is the current step in image generation
|
||||||
|
Step int `json:"step,omitempty"`
|
||||||
|
|
||||||
|
// TotalSteps is the total number of steps for image generation
|
||||||
|
TotalSteps int `json:"total_steps,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn func(CompletionResponse)) error {
|
func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn func(CompletionResponse)) error {
|
||||||
@@ -1757,7 +1773,7 @@ func (s *llamaServer) Tokenize(ctx context.Context, content string) ([]int, erro
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *ollamaServer) Tokenize(ctx context.Context, content string) ([]int, error) {
|
func (s *ollamaServer) Tokenize(ctx context.Context, content string) ([]int, error) {
|
||||||
tokens, err := s.textProcessor.Encode(content, false)
|
tokens, err := s.tokenizer.Encode(content, false)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@@ -1792,7 +1808,7 @@ func (s *ollamaServer) Detokenize(ctx context.Context, tokens []int) (string, er
|
|||||||
toks[i] = int32(t)
|
toks[i] = int32(t)
|
||||||
}
|
}
|
||||||
|
|
||||||
content, err := s.textProcessor.Decode(toks)
|
content, err := s.tokenizer.Decode(toks)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ import (
|
|||||||
"math/rand"
|
"math/rand"
|
||||||
"net/http"
|
"net/http"
|
||||||
"strings"
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/gin-gonic/gin"
|
"github.com/gin-gonic/gin"
|
||||||
|
|
||||||
@@ -441,6 +442,7 @@ type ResponsesWriter struct {
|
|||||||
stream bool
|
stream bool
|
||||||
responseID string
|
responseID string
|
||||||
itemID string
|
itemID string
|
||||||
|
request openai.ResponsesRequest
|
||||||
}
|
}
|
||||||
|
|
||||||
func (w *ResponsesWriter) writeEvent(eventType string, data any) error {
|
func (w *ResponsesWriter) writeEvent(eventType string, data any) error {
|
||||||
@@ -478,7 +480,9 @@ func (w *ResponsesWriter) writeResponse(data []byte) (int, error) {
|
|||||||
|
|
||||||
// Non-streaming response
|
// Non-streaming response
|
||||||
w.ResponseWriter.Header().Set("Content-Type", "application/json")
|
w.ResponseWriter.Header().Set("Content-Type", "application/json")
|
||||||
response := openai.ToResponse(w.model, w.responseID, w.itemID, chatResponse)
|
response := openai.ToResponse(w.model, w.responseID, w.itemID, chatResponse, w.request)
|
||||||
|
completedAt := time.Now().Unix()
|
||||||
|
response.CompletedAt = &completedAt
|
||||||
return len(data), json.NewEncoder(w.ResponseWriter).Encode(response)
|
return len(data), json.NewEncoder(w.ResponseWriter).Encode(response)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -523,11 +527,12 @@ func ResponsesMiddleware() gin.HandlerFunc {
|
|||||||
|
|
||||||
w := &ResponsesWriter{
|
w := &ResponsesWriter{
|
||||||
BaseWriter: BaseWriter{ResponseWriter: c.Writer},
|
BaseWriter: BaseWriter{ResponseWriter: c.Writer},
|
||||||
converter: openai.NewResponsesStreamConverter(responseID, itemID, req.Model),
|
converter: openai.NewResponsesStreamConverter(responseID, itemID, req.Model, req),
|
||||||
model: req.Model,
|
model: req.Model,
|
||||||
stream: streamRequested,
|
stream: streamRequested,
|
||||||
responseID: responseID,
|
responseID: responseID,
|
||||||
itemID: itemID,
|
itemID: itemID,
|
||||||
|
request: req,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set headers based on streaming mode
|
// Set headers based on streaming mode
|
||||||
@@ -541,3 +546,66 @@ func ResponsesMiddleware() gin.HandlerFunc {
|
|||||||
c.Next()
|
c.Next()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type ImageWriter struct {
|
||||||
|
BaseWriter
|
||||||
|
}
|
||||||
|
|
||||||
|
func (w *ImageWriter) writeResponse(data []byte) (int, error) {
|
||||||
|
var generateResponse api.GenerateResponse
|
||||||
|
if err := json.Unmarshal(data, &generateResponse); err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only write response when done with image
|
||||||
|
if generateResponse.Done && generateResponse.Image != "" {
|
||||||
|
w.ResponseWriter.Header().Set("Content-Type", "application/json")
|
||||||
|
return len(data), json.NewEncoder(w.ResponseWriter).Encode(openai.ToImageGenerationResponse(generateResponse))
|
||||||
|
}
|
||||||
|
|
||||||
|
return len(data), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (w *ImageWriter) Write(data []byte) (int, error) {
|
||||||
|
code := w.ResponseWriter.Status()
|
||||||
|
if code != http.StatusOK {
|
||||||
|
return w.writeError(data)
|
||||||
|
}
|
||||||
|
|
||||||
|
return w.writeResponse(data)
|
||||||
|
}
|
||||||
|
|
||||||
|
func ImageGenerationsMiddleware() gin.HandlerFunc {
|
||||||
|
return func(c *gin.Context) {
|
||||||
|
var req openai.ImageGenerationRequest
|
||||||
|
if err := c.ShouldBindJSON(&req); err != nil {
|
||||||
|
c.AbortWithStatusJSON(http.StatusBadRequest, openai.NewError(http.StatusBadRequest, err.Error()))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if req.Prompt == "" {
|
||||||
|
c.AbortWithStatusJSON(http.StatusBadRequest, openai.NewError(http.StatusBadRequest, "prompt is required"))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if req.Model == "" {
|
||||||
|
c.AbortWithStatusJSON(http.StatusBadRequest, openai.NewError(http.StatusBadRequest, "model is required"))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
var b bytes.Buffer
|
||||||
|
if err := json.NewEncoder(&b).Encode(openai.FromImageGenerationRequest(req)); err != nil {
|
||||||
|
c.AbortWithStatusJSON(http.StatusInternalServerError, openai.NewError(http.StatusInternalServerError, err.Error()))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
c.Request.Body = io.NopCloser(&b)
|
||||||
|
|
||||||
|
w := &ImageWriter{
|
||||||
|
BaseWriter: BaseWriter{ResponseWriter: c.Writer},
|
||||||
|
}
|
||||||
|
|
||||||
|
c.Writer = w
|
||||||
|
c.Next()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -961,3 +961,154 @@ func TestRetrieveMiddleware(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestImageGenerationsMiddleware(t *testing.T) {
|
||||||
|
type testCase struct {
|
||||||
|
name string
|
||||||
|
body string
|
||||||
|
req api.GenerateRequest
|
||||||
|
err openai.ErrorResponse
|
||||||
|
}
|
||||||
|
|
||||||
|
var capturedRequest *api.GenerateRequest
|
||||||
|
|
||||||
|
testCases := []testCase{
|
||||||
|
{
|
||||||
|
name: "image generation basic",
|
||||||
|
body: `{
|
||||||
|
"model": "test-model",
|
||||||
|
"prompt": "a beautiful sunset"
|
||||||
|
}`,
|
||||||
|
req: api.GenerateRequest{
|
||||||
|
Model: "test-model",
|
||||||
|
Prompt: "a beautiful sunset",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "image generation with size",
|
||||||
|
body: `{
|
||||||
|
"model": "test-model",
|
||||||
|
"prompt": "a beautiful sunset",
|
||||||
|
"size": "512x768"
|
||||||
|
}`,
|
||||||
|
req: api.GenerateRequest{
|
||||||
|
Model: "test-model",
|
||||||
|
Prompt: "a beautiful sunset",
|
||||||
|
Width: 512,
|
||||||
|
Height: 768,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "image generation missing prompt",
|
||||||
|
body: `{
|
||||||
|
"model": "test-model"
|
||||||
|
}`,
|
||||||
|
err: openai.ErrorResponse{
|
||||||
|
Error: openai.Error{
|
||||||
|
Message: "prompt is required",
|
||||||
|
Type: "invalid_request_error",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "image generation missing model",
|
||||||
|
body: `{
|
||||||
|
"prompt": "a beautiful sunset"
|
||||||
|
}`,
|
||||||
|
err: openai.ErrorResponse{
|
||||||
|
Error: openai.Error{
|
||||||
|
Message: "model is required",
|
||||||
|
Type: "invalid_request_error",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
endpoint := func(c *gin.Context) {
|
||||||
|
c.Status(http.StatusOK)
|
||||||
|
}
|
||||||
|
|
||||||
|
gin.SetMode(gin.TestMode)
|
||||||
|
router := gin.New()
|
||||||
|
router.Use(ImageGenerationsMiddleware(), captureRequestMiddleware(&capturedRequest))
|
||||||
|
router.Handle(http.MethodPost, "/api/generate", endpoint)
|
||||||
|
|
||||||
|
for _, tc := range testCases {
|
||||||
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
req, _ := http.NewRequest(http.MethodPost, "/api/generate", strings.NewReader(tc.body))
|
||||||
|
req.Header.Set("Content-Type", "application/json")
|
||||||
|
|
||||||
|
defer func() { capturedRequest = nil }()
|
||||||
|
|
||||||
|
resp := httptest.NewRecorder()
|
||||||
|
router.ServeHTTP(resp, req)
|
||||||
|
|
||||||
|
if tc.err.Error.Message != "" {
|
||||||
|
var errResp openai.ErrorResponse
|
||||||
|
if err := json.Unmarshal(resp.Body.Bytes(), &errResp); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
if diff := cmp.Diff(tc.err, errResp); diff != "" {
|
||||||
|
t.Fatalf("errors did not match:\n%s", diff)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if resp.Code != http.StatusOK {
|
||||||
|
t.Fatalf("expected status 200, got %d: %s", resp.Code, resp.Body.String())
|
||||||
|
}
|
||||||
|
|
||||||
|
if diff := cmp.Diff(&tc.req, capturedRequest); diff != "" {
|
||||||
|
t.Fatalf("requests did not match:\n%s", diff)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestImageWriterResponse(t *testing.T) {
|
||||||
|
gin.SetMode(gin.TestMode)
|
||||||
|
|
||||||
|
// Test that ImageWriter transforms GenerateResponse to OpenAI format
|
||||||
|
endpoint := func(c *gin.Context) {
|
||||||
|
resp := api.GenerateResponse{
|
||||||
|
Model: "test-model",
|
||||||
|
CreatedAt: time.Unix(1234567890, 0).UTC(),
|
||||||
|
Done: true,
|
||||||
|
Image: "dGVzdC1pbWFnZS1kYXRh", // base64 of "test-image-data"
|
||||||
|
}
|
||||||
|
data, _ := json.Marshal(resp)
|
||||||
|
c.Writer.Write(append(data, '\n'))
|
||||||
|
}
|
||||||
|
|
||||||
|
router := gin.New()
|
||||||
|
router.Use(ImageGenerationsMiddleware())
|
||||||
|
router.Handle(http.MethodPost, "/api/generate", endpoint)
|
||||||
|
|
||||||
|
body := `{"model": "test-model", "prompt": "test"}`
|
||||||
|
req, _ := http.NewRequest(http.MethodPost, "/api/generate", strings.NewReader(body))
|
||||||
|
req.Header.Set("Content-Type", "application/json")
|
||||||
|
|
||||||
|
resp := httptest.NewRecorder()
|
||||||
|
router.ServeHTTP(resp, req)
|
||||||
|
|
||||||
|
if resp.Code != http.StatusOK {
|
||||||
|
t.Fatalf("expected status 200, got %d: %s", resp.Code, resp.Body.String())
|
||||||
|
}
|
||||||
|
|
||||||
|
var imageResp openai.ImageGenerationResponse
|
||||||
|
if err := json.Unmarshal(resp.Body.Bytes(), &imageResp); err != nil {
|
||||||
|
t.Fatalf("failed to unmarshal response: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if imageResp.Created != 1234567890 {
|
||||||
|
t.Errorf("expected created 1234567890, got %d", imageResp.Created)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(imageResp.Data) != 1 {
|
||||||
|
t.Fatalf("expected 1 image, got %d", len(imageResp.Data))
|
||||||
|
}
|
||||||
|
|
||||||
|
if imageResp.Data[0].B64JSON != "dGVzdC1pbWFnZS1kYXRh" {
|
||||||
|
t.Errorf("expected image data 'dGVzdC1pbWFnZS1kYXRh', got %s", imageResp.Data[0].B64JSON)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
409
model/ignore_test.go
Normal file
@@ -23,6 +23,7 @@ import (
|
|||||||
_ "github.com/ollama/ollama/ml/backend"
|
_ "github.com/ollama/ollama/ml/backend"
|
||||||
"github.com/ollama/ollama/ml/nn/pooling"
|
"github.com/ollama/ollama/ml/nn/pooling"
|
||||||
"github.com/ollama/ollama/model/input"
|
"github.com/ollama/ollama/model/input"
|
||||||
|
"github.com/ollama/ollama/tokenizers"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
@@ -119,7 +120,7 @@ func New(modelPath string, params ml.BackendParams) (Model, error) {
|
|||||||
return m, nil
|
return m, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewTextProcessor(s string) (TextProcessor, error) {
|
func NewTextProcessor(s string) (tokenizers.Tokenizer, error) {
|
||||||
r, err := os.Open(s)
|
r, err := os.Open(s)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
@@ -136,7 +137,7 @@ func NewTextProcessor(s string) (TextProcessor, error) {
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
tp, ok := m.(TextProcessor)
|
tp, ok := m.(tokenizers.Tokenizer)
|
||||||
if !ok {
|
if !ok {
|
||||||
return nil, ErrUnsupportedTokenizer
|
return nil, ErrUnsupportedTokenizer
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -10,11 +10,12 @@ import (
|
|||||||
"github.com/ollama/ollama/ml/nn/pooling"
|
"github.com/ollama/ollama/ml/nn/pooling"
|
||||||
"github.com/ollama/ollama/model"
|
"github.com/ollama/ollama/model"
|
||||||
"github.com/ollama/ollama/model/input"
|
"github.com/ollama/ollama/model/input"
|
||||||
|
"github.com/ollama/ollama/tokenizers"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Model struct {
|
type Model struct {
|
||||||
model.Base
|
model.Base
|
||||||
model.TextProcessor
|
tokenizers.Tokenizer
|
||||||
|
|
||||||
TokenEmbedding *nn.Embedding `gguf:"token_embd"`
|
TokenEmbedding *nn.Embedding `gguf:"token_embd"`
|
||||||
TypeEmbedding *nn.Embedding `gguf:"token_types"`
|
TypeEmbedding *nn.Embedding `gguf:"token_types"`
|
||||||
@@ -129,7 +130,7 @@ func (o Options) headDim() int {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func New(c fs.Config) (model.Model, error) {
|
func New(c fs.Config) (model.Model, error) {
|
||||||
vocab := &model.Vocabulary{
|
vocab := &tokenizers.Vocabulary{
|
||||||
Values: c.Strings("tokenizer.ggml.tokens"),
|
Values: c.Strings("tokenizer.ggml.tokens"),
|
||||||
Scores: c.Floats("tokenizer.ggml.scores"),
|
Scores: c.Floats("tokenizer.ggml.scores"),
|
||||||
Types: c.Ints("tokenizer.ggml.token_type"),
|
Types: c.Ints("tokenizer.ggml.token_type"),
|
||||||
@@ -153,16 +154,16 @@ func New(c fs.Config) (model.Model, error) {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
var processor model.TextProcessor
|
var tokenizer tokenizers.Tokenizer
|
||||||
switch c.String("tokenizer.ggml.model", "bert") {
|
switch c.String("tokenizer.ggml.model", "bert") {
|
||||||
case "bert":
|
case "bert":
|
||||||
processor = model.NewWordPiece(vocab, true)
|
tokenizer = tokenizers.NewWordPiece(vocab, true)
|
||||||
default:
|
default:
|
||||||
return nil, model.ErrUnsupportedTokenizer
|
return nil, model.ErrUnsupportedTokenizer
|
||||||
}
|
}
|
||||||
|
|
||||||
return &Model{
|
return &Model{
|
||||||
TextProcessor: processor,
|
Tokenizer: tokenizer,
|
||||||
Layers: make([]EncoderLayer, c.Uint("block_count")),
|
Layers: make([]EncoderLayer, c.Uint("block_count")),
|
||||||
Options: Options{
|
Options: Options{
|
||||||
hiddenSize: int(c.Uint("embedding_length")),
|
hiddenSize: int(c.Uint("embedding_length")),
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ import (
|
|||||||
"github.com/ollama/ollama/ml/nn/rope"
|
"github.com/ollama/ollama/ml/nn/rope"
|
||||||
"github.com/ollama/ollama/model"
|
"github.com/ollama/ollama/model"
|
||||||
"github.com/ollama/ollama/model/input"
|
"github.com/ollama/ollama/model/input"
|
||||||
|
"github.com/ollama/ollama/tokenizers"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Options struct {
|
type Options struct {
|
||||||
@@ -222,7 +223,7 @@ func (t *Layer) Forward(ctx ml.Context, hiddenStates, positions, outputs ml.Tens
|
|||||||
|
|
||||||
type Model struct {
|
type Model struct {
|
||||||
model.Base
|
model.Base
|
||||||
model.BytePairEncoding
|
tokenizers.Tokenizer
|
||||||
|
|
||||||
TokenEmbedding *nn.Embedding `gguf:"token_embd"`
|
TokenEmbedding *nn.Embedding `gguf:"token_embd"`
|
||||||
Layers []Layer `gguf:"blk"`
|
Layers []Layer `gguf:"blk"`
|
||||||
@@ -277,8 +278,8 @@ func New(c fs.Config) (model.Model, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
m := Model{
|
m := Model{
|
||||||
BytePairEncoding: model.NewBytePairEncoding(
|
Tokenizer: tokenizers.NewBytePairEncoding(
|
||||||
&model.Vocabulary{
|
&tokenizers.Vocabulary{
|
||||||
Values: c.Strings("tokenizer.ggml.tokens"),
|
Values: c.Strings("tokenizer.ggml.tokens"),
|
||||||
Types: c.Ints("tokenizer.ggml.token_type"),
|
Types: c.Ints("tokenizer.ggml.token_type"),
|
||||||
Merges: c.Strings("tokenizer.ggml.merges"),
|
Merges: c.Strings("tokenizer.ggml.merges"),
|
||||||
|
|||||||
@@ -10,11 +10,12 @@ import (
|
|||||||
"github.com/ollama/ollama/ml/nn"
|
"github.com/ollama/ollama/ml/nn"
|
||||||
"github.com/ollama/ollama/model"
|
"github.com/ollama/ollama/model"
|
||||||
"github.com/ollama/ollama/model/input"
|
"github.com/ollama/ollama/model/input"
|
||||||
|
"github.com/ollama/ollama/tokenizers"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Model struct {
|
type Model struct {
|
||||||
model.Base
|
model.Base
|
||||||
model.TextProcessor
|
tokenizers.Tokenizer
|
||||||
|
|
||||||
Sam *samModel `gguf:"s"`
|
Sam *samModel `gguf:"s"`
|
||||||
Vision *visionModel `gguf:"v"`
|
Vision *visionModel `gguf:"v"`
|
||||||
@@ -134,8 +135,8 @@ func init() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
m := Model{
|
m := Model{
|
||||||
TextProcessor: model.NewBytePairEncoding(
|
Tokenizer: tokenizers.NewBytePairEncoding(
|
||||||
&model.Vocabulary{
|
&tokenizers.Vocabulary{
|
||||||
Values: c.Strings("tokenizer.ggml.tokens"),
|
Values: c.Strings("tokenizer.ggml.tokens"),
|
||||||
Types: c.Ints("tokenizer.ggml.token_type"),
|
Types: c.Ints("tokenizer.ggml.token_type"),
|
||||||
Merges: c.Strings("tokenizer.ggml.merges"),
|
Merges: c.Strings("tokenizer.ggml.merges"),
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ import (
|
|||||||
"github.com/ollama/ollama/ml/nn/rope"
|
"github.com/ollama/ollama/ml/nn/rope"
|
||||||
"github.com/ollama/ollama/model"
|
"github.com/ollama/ollama/model"
|
||||||
"github.com/ollama/ollama/model/input"
|
"github.com/ollama/ollama/model/input"
|
||||||
|
"github.com/ollama/ollama/tokenizers"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Options struct {
|
type Options struct {
|
||||||
@@ -27,7 +28,7 @@ func (o Options) applyRotaryPositionEmbeddings(ctx ml.Context, states, positions
|
|||||||
|
|
||||||
type Model struct {
|
type Model struct {
|
||||||
model.Base
|
model.Base
|
||||||
model.SentencePiece
|
tokenizers.Tokenizer
|
||||||
|
|
||||||
TokenEmbedding *nn.Embedding `gguf:"token_embd"`
|
TokenEmbedding *nn.Embedding `gguf:"token_embd"`
|
||||||
Layers []Layer `gguf:"blk"`
|
Layers []Layer `gguf:"blk"`
|
||||||
@@ -43,8 +44,8 @@ const (
|
|||||||
|
|
||||||
func New(c fs.Config) (model.Model, error) {
|
func New(c fs.Config) (model.Model, error) {
|
||||||
m := Model{
|
m := Model{
|
||||||
SentencePiece: model.NewSentencePiece(
|
Tokenizer: tokenizers.NewSentencePiece(
|
||||||
&model.Vocabulary{
|
&tokenizers.Vocabulary{
|
||||||
Values: c.Strings("tokenizer.ggml.tokens"),
|
Values: c.Strings("tokenizer.ggml.tokens"),
|
||||||
Scores: c.Floats("tokenizer.ggml.scores"),
|
Scores: c.Floats("tokenizer.ggml.scores"),
|
||||||
Types: c.Ints("tokenizer.ggml.token_type"),
|
Types: c.Ints("tokenizer.ggml.token_type"),
|
||||||
|
|||||||
@@ -7,11 +7,12 @@ import (
|
|||||||
"github.com/ollama/ollama/ml/nn/pooling"
|
"github.com/ollama/ollama/ml/nn/pooling"
|
||||||
"github.com/ollama/ollama/model"
|
"github.com/ollama/ollama/model"
|
||||||
"github.com/ollama/ollama/model/input"
|
"github.com/ollama/ollama/model/input"
|
||||||
|
"github.com/ollama/ollama/tokenizers"
|
||||||
)
|
)
|
||||||
|
|
||||||
type embedModel struct {
|
type embedModel struct {
|
||||||
model.Base
|
model.Base
|
||||||
model.SentencePiece
|
tokenizers.Tokenizer
|
||||||
|
|
||||||
*TextModel
|
*TextModel
|
||||||
poolingType pooling.Type
|
poolingType pooling.Type
|
||||||
@@ -31,8 +32,8 @@ func (m *embedModel) Forward(ctx ml.Context, batch input.Batch) (ml.Tensor, erro
|
|||||||
|
|
||||||
func newEmbedModel(c fs.Config) (model.Model, error) {
|
func newEmbedModel(c fs.Config) (model.Model, error) {
|
||||||
m := &embedModel{
|
m := &embedModel{
|
||||||
SentencePiece: model.NewSentencePiece(
|
Tokenizer: tokenizers.NewSentencePiece(
|
||||||
&model.Vocabulary{
|
&tokenizers.Vocabulary{
|
||||||
Values: c.Strings("tokenizer.ggml.tokens"),
|
Values: c.Strings("tokenizer.ggml.tokens"),
|
||||||
Scores: c.Floats("tokenizer.ggml.scores"),
|
Scores: c.Floats("tokenizer.ggml.scores"),
|
||||||
Types: c.Ints("tokenizer.ggml.token_type"),
|
Types: c.Ints("tokenizer.ggml.token_type"),
|
||||||
|
|||||||
@@ -12,11 +12,12 @@ import (
|
|||||||
"github.com/ollama/ollama/ml/nn"
|
"github.com/ollama/ollama/ml/nn"
|
||||||
"github.com/ollama/ollama/model"
|
"github.com/ollama/ollama/model"
|
||||||
"github.com/ollama/ollama/model/input"
|
"github.com/ollama/ollama/model/input"
|
||||||
|
"github.com/ollama/ollama/tokenizers"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Model struct {
|
type Model struct {
|
||||||
model.Base
|
model.Base
|
||||||
model.TextProcessor
|
tokenizers.Tokenizer
|
||||||
|
|
||||||
*VisionModel `gguf:"v"`
|
*VisionModel `gguf:"v"`
|
||||||
*TextModel
|
*TextModel
|
||||||
@@ -54,7 +55,7 @@ func (p *MultiModalProjector) Forward(ctx ml.Context, visionOutputs ml.Tensor, i
|
|||||||
}
|
}
|
||||||
|
|
||||||
func New(c fs.Config) (model.Model, error) {
|
func New(c fs.Config) (model.Model, error) {
|
||||||
vocabulary := model.Vocabulary{
|
vocabulary := tokenizers.Vocabulary{
|
||||||
Values: c.Strings("tokenizer.ggml.tokens"),
|
Values: c.Strings("tokenizer.ggml.tokens"),
|
||||||
Scores: c.Floats("tokenizer.ggml.scores"),
|
Scores: c.Floats("tokenizer.ggml.scores"),
|
||||||
Types: c.Ints("tokenizer.ggml.token_type"),
|
Types: c.Ints("tokenizer.ggml.token_type"),
|
||||||
@@ -70,19 +71,19 @@ func New(c fs.Config) (model.Model, error) {
|
|||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
var processor model.TextProcessor
|
var tokenizer tokenizers.Tokenizer
|
||||||
switch c.String("tokenizer.ggml.model") {
|
switch c.String("tokenizer.ggml.model") {
|
||||||
case "gpt2":
|
case "gpt2":
|
||||||
processor = model.NewBytePairEncoding(&vocabulary)
|
tokenizer = tokenizers.NewBytePairEncoding(&vocabulary)
|
||||||
default:
|
default:
|
||||||
// Previous uploads of Gemma 3 on Ollama did not have token 106
|
// Previous uploads of Gemma 3 on Ollama did not have token 106
|
||||||
// (i.e. "<end_of_turn>") so we need to add in case it's not already present
|
// (i.e. "<end_of_turn>") so we need to add in case it's not already present
|
||||||
vocabulary.EOS = append(vocabulary.EOS, int32(c.Uint("tokenizer.ggml.eot_token_id", 106)))
|
vocabulary.EOS = append(vocabulary.EOS, int32(c.Uint("tokenizer.ggml.eot_token_id", 106)))
|
||||||
processor = model.NewSentencePiece(&vocabulary)
|
tokenizer = tokenizers.NewSentencePiece(&vocabulary)
|
||||||
}
|
}
|
||||||
|
|
||||||
m := Model{
|
m := Model{
|
||||||
TextProcessor: processor,
|
Tokenizer: tokenizer,
|
||||||
ImageProcessor: newImageProcessor(c),
|
ImageProcessor: newImageProcessor(c),
|
||||||
VisionModel: newVisionModel(c),
|
VisionModel: newVisionModel(c),
|
||||||
TextModel: newTextModel(c),
|
TextModel: newTextModel(c),
|
||||||
|
|||||||
@@ -6,11 +6,12 @@ import (
|
|||||||
"github.com/ollama/ollama/ml"
|
"github.com/ollama/ollama/ml"
|
||||||
"github.com/ollama/ollama/model"
|
"github.com/ollama/ollama/model"
|
||||||
"github.com/ollama/ollama/model/input"
|
"github.com/ollama/ollama/model/input"
|
||||||
|
"github.com/ollama/ollama/tokenizers"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Model struct {
|
type Model struct {
|
||||||
model.Base
|
model.Base
|
||||||
model.SentencePiece
|
tokenizers.Tokenizer
|
||||||
|
|
||||||
*TextModel
|
*TextModel
|
||||||
}
|
}
|
||||||
@@ -23,8 +24,8 @@ func (m *Model) Forward(ctx ml.Context, batch input.Batch) (ml.Tensor, error) {
|
|||||||
func New(c fs.Config) (model.Model, error) {
|
func New(c fs.Config) (model.Model, error) {
|
||||||
m := Model{
|
m := Model{
|
||||||
TextModel: newTextModel(c),
|
TextModel: newTextModel(c),
|
||||||
SentencePiece: model.NewSentencePiece(
|
Tokenizer: tokenizers.NewSentencePiece(
|
||||||
&model.Vocabulary{
|
&tokenizers.Vocabulary{
|
||||||
Values: c.Strings("tokenizer.ggml.tokens"),
|
Values: c.Strings("tokenizer.ggml.tokens"),
|
||||||
Scores: c.Floats("tokenizer.ggml.scores"),
|
Scores: c.Floats("tokenizer.ggml.scores"),
|
||||||
Types: c.Ints("tokenizer.ggml.token_type"),
|
Types: c.Ints("tokenizer.ggml.token_type"),
|
||||||
|
|||||||
@@ -12,11 +12,12 @@ import (
|
|||||||
"github.com/ollama/ollama/ml/nn/rope"
|
"github.com/ollama/ollama/ml/nn/rope"
|
||||||
"github.com/ollama/ollama/model"
|
"github.com/ollama/ollama/model"
|
||||||
"github.com/ollama/ollama/model/input"
|
"github.com/ollama/ollama/model/input"
|
||||||
|
"github.com/ollama/ollama/tokenizers"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Transformer struct {
|
type Transformer struct {
|
||||||
model.Base
|
model.Base
|
||||||
model.BytePairEncoding
|
tokenizers.Tokenizer
|
||||||
|
|
||||||
TokenEmbedding *nn.Embedding `gguf:"token_embd"`
|
TokenEmbedding *nn.Embedding `gguf:"token_embd"`
|
||||||
TransformerBlocks []TransformerBlock `gguf:"blk"`
|
TransformerBlocks []TransformerBlock `gguf:"blk"`
|
||||||
@@ -196,8 +197,8 @@ func (mlp *MLPBlock) Forward(ctx ml.Context, hiddenStates ml.Tensor, opts *Optio
|
|||||||
func New(c fs.Config) (model.Model, error) {
|
func New(c fs.Config) (model.Model, error) {
|
||||||
m := Transformer{
|
m := Transformer{
|
||||||
TransformerBlocks: make([]TransformerBlock, c.Uint("block_count")),
|
TransformerBlocks: make([]TransformerBlock, c.Uint("block_count")),
|
||||||
BytePairEncoding: model.NewBytePairEncoding(
|
Tokenizer: tokenizers.NewBytePairEncoding(
|
||||||
&model.Vocabulary{
|
&tokenizers.Vocabulary{
|
||||||
Values: c.Strings("tokenizer.ggml.tokens"),
|
Values: c.Strings("tokenizer.ggml.tokens"),
|
||||||
Types: c.Ints("tokenizer.ggml.token_type"),
|
Types: c.Ints("tokenizer.ggml.token_type"),
|
||||||
Merges: c.Strings("tokenizer.ggml.merges"),
|
Merges: c.Strings("tokenizer.ggml.merges"),
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ import (
|
|||||||
"github.com/ollama/ollama/ml/nn/rope"
|
"github.com/ollama/ollama/ml/nn/rope"
|
||||||
"github.com/ollama/ollama/model"
|
"github.com/ollama/ollama/model"
|
||||||
"github.com/ollama/ollama/model/input"
|
"github.com/ollama/ollama/model/input"
|
||||||
|
"github.com/ollama/ollama/tokenizers"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Options struct {
|
type Options struct {
|
||||||
@@ -25,7 +26,7 @@ func (o Options) applyRotaryPositionEmbeddings(ctx ml.Context, states, positions
|
|||||||
|
|
||||||
type Model struct {
|
type Model struct {
|
||||||
model.Base
|
model.Base
|
||||||
model.TextProcessor
|
tokenizers.Tokenizer
|
||||||
|
|
||||||
TokenEmbedding *nn.Embedding `gguf:"token_embd"`
|
TokenEmbedding *nn.Embedding `gguf:"token_embd"`
|
||||||
Layers []Layer `gguf:"blk"`
|
Layers []Layer `gguf:"blk"`
|
||||||
@@ -41,8 +42,8 @@ func New(c fs.Config) (model.Model, error) {
|
|||||||
return nil, model.ErrUnsupportedModel
|
return nil, model.ErrUnsupportedModel
|
||||||
}
|
}
|
||||||
|
|
||||||
var processor model.TextProcessor
|
var processor tokenizers.Tokenizer
|
||||||
vocabulary := model.Vocabulary{
|
vocabulary := tokenizers.Vocabulary{
|
||||||
Values: c.Strings("tokenizer.ggml.tokens"),
|
Values: c.Strings("tokenizer.ggml.tokens"),
|
||||||
Scores: c.Floats("tokenizer.ggml.scores"),
|
Scores: c.Floats("tokenizer.ggml.scores"),
|
||||||
Types: c.Ints("tokenizer.ggml.token_type"),
|
Types: c.Ints("tokenizer.ggml.token_type"),
|
||||||
@@ -80,15 +81,15 @@ func New(c fs.Config) (model.Model, error) {
|
|||||||
"(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
|
"(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
processor = model.NewBytePairEncoding(&vocabulary, pretokenizers...)
|
processor = tokenizers.NewBytePairEncoding(&vocabulary, pretokenizers...)
|
||||||
case "llama":
|
case "llama":
|
||||||
processor = model.NewSentencePiece(&vocabulary)
|
processor = tokenizers.NewSentencePiece(&vocabulary)
|
||||||
default:
|
default:
|
||||||
return nil, model.ErrUnsupportedTokenizer
|
return nil, model.ErrUnsupportedTokenizer
|
||||||
}
|
}
|
||||||
|
|
||||||
m := Model{
|
m := Model{
|
||||||
TextProcessor: processor,
|
Tokenizer: processor,
|
||||||
Layers: make([]Layer, c.Uint("block_count")),
|
Layers: make([]Layer, c.Uint("block_count")),
|
||||||
Options: Options{
|
Options: Options{
|
||||||
hiddenSize: int(c.Uint("embedding_length")),
|
hiddenSize: int(c.Uint("embedding_length")),
|
||||||
|
|||||||
@@ -11,11 +11,12 @@ import (
|
|||||||
"github.com/ollama/ollama/ml/nn"
|
"github.com/ollama/ollama/ml/nn"
|
||||||
"github.com/ollama/ollama/model"
|
"github.com/ollama/ollama/model"
|
||||||
"github.com/ollama/ollama/model/input"
|
"github.com/ollama/ollama/model/input"
|
||||||
|
"github.com/ollama/ollama/tokenizers"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Model struct {
|
type Model struct {
|
||||||
model.Base
|
model.Base
|
||||||
model.BytePairEncoding
|
tokenizers.Tokenizer
|
||||||
ImageProcessor
|
ImageProcessor
|
||||||
|
|
||||||
*VisionModel `gguf:"v"`
|
*VisionModel `gguf:"v"`
|
||||||
@@ -33,8 +34,8 @@ func (p *Projector) Forward(ctx ml.Context, visionOutputs ml.Tensor) ml.Tensor {
|
|||||||
|
|
||||||
func New(c fs.Config) (model.Model, error) {
|
func New(c fs.Config) (model.Model, error) {
|
||||||
m := Model{
|
m := Model{
|
||||||
BytePairEncoding: model.NewBytePairEncoding(
|
Tokenizer: tokenizers.NewBytePairEncoding(
|
||||||
&model.Vocabulary{
|
&tokenizers.Vocabulary{
|
||||||
Values: c.Strings("tokenizer.ggml.tokens"),
|
Values: c.Strings("tokenizer.ggml.tokens"),
|
||||||
Types: c.Ints("tokenizer.ggml.token_type"),
|
Types: c.Ints("tokenizer.ggml.token_type"),
|
||||||
Merges: c.Strings("tokenizer.ggml.merges"),
|
Merges: c.Strings("tokenizer.ggml.merges"),
|
||||||
|
|||||||
@@ -11,11 +11,12 @@ import (
|
|||||||
"github.com/ollama/ollama/ml/nn"
|
"github.com/ollama/ollama/ml/nn"
|
||||||
"github.com/ollama/ollama/model"
|
"github.com/ollama/ollama/model"
|
||||||
"github.com/ollama/ollama/model/input"
|
"github.com/ollama/ollama/model/input"
|
||||||
|
"github.com/ollama/ollama/tokenizers"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Model struct {
|
type Model struct {
|
||||||
model.Base
|
model.Base
|
||||||
model.BytePairEncoding
|
tokenizers.Tokenizer
|
||||||
|
|
||||||
*TextModel
|
*TextModel
|
||||||
*VisionModel `gguf:"v"`
|
*VisionModel `gguf:"v"`
|
||||||
@@ -28,12 +29,12 @@ type Model struct {
|
|||||||
var _ model.MultimodalProcessor = (*Model)(nil)
|
var _ model.MultimodalProcessor = (*Model)(nil)
|
||||||
|
|
||||||
// Implement TextProcessor interface
|
// Implement TextProcessor interface
|
||||||
var _ model.TextProcessor = (*Model)(nil)
|
var _ tokenizers.Tokenizer = (*Model)(nil)
|
||||||
|
|
||||||
func New(c fs.Config) (model.Model, error) {
|
func New(c fs.Config) (model.Model, error) {
|
||||||
m := &Model{
|
m := &Model{
|
||||||
BytePairEncoding: model.NewBytePairEncoding(
|
Tokenizer: tokenizers.NewBytePairEncoding(
|
||||||
&model.Vocabulary{
|
&tokenizers.Vocabulary{
|
||||||
Values: c.Strings("tokenizer.ggml.tokens"),
|
Values: c.Strings("tokenizer.ggml.tokens"),
|
||||||
Types: c.Ints("tokenizer.ggml.token_type"),
|
Types: c.Ints("tokenizer.ggml.token_type"),
|
||||||
Merges: c.Strings("tokenizer.ggml.merges"),
|
Merges: c.Strings("tokenizer.ggml.merges"),
|
||||||
|
|||||||
@@ -11,11 +11,12 @@ import (
|
|||||||
"github.com/ollama/ollama/ml/nn"
|
"github.com/ollama/ollama/ml/nn"
|
||||||
"github.com/ollama/ollama/model"
|
"github.com/ollama/ollama/model"
|
||||||
"github.com/ollama/ollama/model/input"
|
"github.com/ollama/ollama/model/input"
|
||||||
|
"github.com/ollama/ollama/tokenizers"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Model struct {
|
type Model struct {
|
||||||
model.Base
|
model.Base
|
||||||
model.BytePairEncoding
|
tokenizers.Tokenizer
|
||||||
|
|
||||||
*VisionModel `gguf:"v"`
|
*VisionModel `gguf:"v"`
|
||||||
*TextModel
|
*TextModel
|
||||||
@@ -32,8 +33,8 @@ const (
|
|||||||
|
|
||||||
func New(c fs.Config) (model.Model, error) {
|
func New(c fs.Config) (model.Model, error) {
|
||||||
m := Model{
|
m := Model{
|
||||||
BytePairEncoding: model.NewBytePairEncoding(
|
Tokenizer: tokenizers.NewBytePairEncoding(
|
||||||
&model.Vocabulary{
|
&tokenizers.Vocabulary{
|
||||||
Values: c.Strings("tokenizer.ggml.tokens"),
|
Values: c.Strings("tokenizer.ggml.tokens"),
|
||||||
Types: c.Ints("tokenizer.ggml.token_type"),
|
Types: c.Ints("tokenizer.ggml.token_type"),
|
||||||
Merges: c.Strings("tokenizer.ggml.merges"),
|
Merges: c.Strings("tokenizer.ggml.merges"),
|
||||||
|
|||||||
@@ -11,11 +11,12 @@ import (
|
|||||||
"github.com/ollama/ollama/ml/nn/rope"
|
"github.com/ollama/ollama/ml/nn/rope"
|
||||||
"github.com/ollama/ollama/model"
|
"github.com/ollama/ollama/model"
|
||||||
"github.com/ollama/ollama/model/input"
|
"github.com/ollama/ollama/model/input"
|
||||||
|
"github.com/ollama/ollama/tokenizers"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Model struct {
|
type Model struct {
|
||||||
model.Base
|
model.Base
|
||||||
model.TextProcessor
|
tokenizers.Tokenizer
|
||||||
|
|
||||||
TokenEmbedding *nn.Embedding `gguf:"token_embd"`
|
TokenEmbedding *nn.Embedding `gguf:"token_embd"`
|
||||||
TypeEmbedding *nn.Embedding `gguf:"token_types"`
|
TypeEmbedding *nn.Embedding `gguf:"token_types"`
|
||||||
@@ -178,8 +179,8 @@ func New(c fs.Config) (model.Model, error) {
|
|||||||
numHeads := int(c.Uint("attention.head_count"))
|
numHeads := int(c.Uint("attention.head_count"))
|
||||||
headDim := hiddenSize / numHeads
|
headDim := hiddenSize / numHeads
|
||||||
|
|
||||||
processor := model.NewWordPiece(
|
tokenizer := tokenizers.NewWordPiece(
|
||||||
&model.Vocabulary{
|
&tokenizers.Vocabulary{
|
||||||
Values: c.Strings("tokenizer.ggml.tokens"),
|
Values: c.Strings("tokenizer.ggml.tokens"),
|
||||||
Scores: c.Floats("tokenizer.ggml.scores"),
|
Scores: c.Floats("tokenizer.ggml.scores"),
|
||||||
Types: c.Ints("tokenizer.ggml.token_type"),
|
Types: c.Ints("tokenizer.ggml.token_type"),
|
||||||
@@ -219,7 +220,7 @@ func New(c fs.Config) (model.Model, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
return &Model{
|
return &Model{
|
||||||
TextProcessor: processor,
|
Tokenizer: tokenizer,
|
||||||
Layers: layers,
|
Layers: layers,
|
||||||
Options: Options{
|
Options: Options{
|
||||||
hiddenSize: hiddenSize,
|
hiddenSize: hiddenSize,
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ import (
|
|||||||
"github.com/ollama/ollama/ml/nn/rope"
|
"github.com/ollama/ollama/ml/nn/rope"
|
||||||
"github.com/ollama/ollama/model"
|
"github.com/ollama/ollama/model"
|
||||||
"github.com/ollama/ollama/model/input"
|
"github.com/ollama/ollama/model/input"
|
||||||
|
"github.com/ollama/ollama/tokenizers"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
@@ -33,7 +34,7 @@ type Options struct {
|
|||||||
|
|
||||||
type Model struct {
|
type Model struct {
|
||||||
model.Base
|
model.Base
|
||||||
model.TextProcessor
|
tokenizers.Tokenizer
|
||||||
|
|
||||||
TokenEmbedding *nn.Embedding `gguf:"token_embd"`
|
TokenEmbedding *nn.Embedding `gguf:"token_embd"`
|
||||||
Layers []Layer `gguf:"blk"`
|
Layers []Layer `gguf:"blk"`
|
||||||
@@ -44,7 +45,7 @@ type Model struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func New(c fs.Config) (model.Model, error) {
|
func New(c fs.Config) (model.Model, error) {
|
||||||
vocabulary := model.Vocabulary{
|
vocabulary := tokenizers.Vocabulary{
|
||||||
Values: c.Strings("tokenizer.ggml.tokens"),
|
Values: c.Strings("tokenizer.ggml.tokens"),
|
||||||
Scores: c.Floats("tokenizer.ggml.scores"),
|
Scores: c.Floats("tokenizer.ggml.scores"),
|
||||||
Types: c.Ints("tokenizer.ggml.token_type"),
|
Types: c.Ints("tokenizer.ggml.token_type"),
|
||||||
@@ -58,13 +59,13 @@ func New(c fs.Config) (model.Model, error) {
|
|||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
processor := model.NewBytePairEncoding(
|
tokenizer := tokenizers.NewBytePairEncoding(
|
||||||
&vocabulary,
|
&vocabulary,
|
||||||
"(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
|
"(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
|
||||||
)
|
)
|
||||||
|
|
||||||
m := Model{
|
m := Model{
|
||||||
TextProcessor: processor,
|
Tokenizer: tokenizer,
|
||||||
Layers: make([]Layer, c.Uint("block_count")),
|
Layers: make([]Layer, c.Uint("block_count")),
|
||||||
Options: Options{
|
Options: Options{
|
||||||
hiddenSize: int(c.Uint("embedding_length")),
|
hiddenSize: int(c.Uint("embedding_length")),
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ import (
|
|||||||
"github.com/ollama/ollama/ml/nn/rope"
|
"github.com/ollama/ollama/ml/nn/rope"
|
||||||
"github.com/ollama/ollama/model"
|
"github.com/ollama/ollama/model"
|
||||||
"github.com/ollama/ollama/model/input"
|
"github.com/ollama/ollama/model/input"
|
||||||
|
"github.com/ollama/ollama/tokenizers"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Options struct {
|
type Options struct {
|
||||||
@@ -92,7 +93,7 @@ func (d DecoderLayer) Forward(ctx ml.Context, hiddenStates, positions, outputs m
|
|||||||
|
|
||||||
type Model struct {
|
type Model struct {
|
||||||
model.Base
|
model.Base
|
||||||
model.BytePairEncoding
|
tokenizers.Tokenizer
|
||||||
|
|
||||||
TokenEmbedding *nn.Embedding `gguf:"token_embd"`
|
TokenEmbedding *nn.Embedding `gguf:"token_embd"`
|
||||||
Layers []DecoderLayer `gguf:"blk"`
|
Layers []DecoderLayer `gguf:"blk"`
|
||||||
@@ -139,8 +140,8 @@ func New(c fs.Config) (model.Model, error) {
|
|||||||
}
|
}
|
||||||
m := Model{
|
m := Model{
|
||||||
Layers: make([]DecoderLayer, c.Uint("block_count")),
|
Layers: make([]DecoderLayer, c.Uint("block_count")),
|
||||||
BytePairEncoding: model.NewBytePairEncoding(
|
Tokenizer: tokenizers.NewBytePairEncoding(
|
||||||
&model.Vocabulary{
|
&tokenizers.Vocabulary{
|
||||||
Values: c.Strings("tokenizer.ggml.tokens"),
|
Values: c.Strings("tokenizer.ggml.tokens"),
|
||||||
Types: c.Ints("tokenizer.ggml.token_type"),
|
Types: c.Ints("tokenizer.ggml.token_type"),
|
||||||
Merges: c.Strings("tokenizer.ggml.merges"),
|
Merges: c.Strings("tokenizer.ggml.merges"),
|
||||||
|
|||||||
@@ -10,11 +10,12 @@ import (
|
|||||||
"github.com/ollama/ollama/ml"
|
"github.com/ollama/ollama/ml"
|
||||||
"github.com/ollama/ollama/model"
|
"github.com/ollama/ollama/model"
|
||||||
"github.com/ollama/ollama/model/input"
|
"github.com/ollama/ollama/model/input"
|
||||||
|
"github.com/ollama/ollama/tokenizers"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Model struct {
|
type Model struct {
|
||||||
model.Base
|
model.Base
|
||||||
model.BytePairEncoding
|
tokenizers.Tokenizer
|
||||||
|
|
||||||
*TextModel
|
*TextModel
|
||||||
*VisionModel `gguf:"v"`
|
*VisionModel `gguf:"v"`
|
||||||
@@ -27,8 +28,8 @@ var _ model.MultimodalProcessor = (*Model)(nil)
|
|||||||
|
|
||||||
func New(c fs.Config) (model.Model, error) {
|
func New(c fs.Config) (model.Model, error) {
|
||||||
m := &Model{
|
m := &Model{
|
||||||
BytePairEncoding: model.NewBytePairEncoding(
|
Tokenizer: tokenizers.NewBytePairEncoding(
|
||||||
&model.Vocabulary{
|
&tokenizers.Vocabulary{
|
||||||
Values: c.Strings("tokenizer.ggml.tokens"),
|
Values: c.Strings("tokenizer.ggml.tokens"),
|
||||||
Types: c.Ints("tokenizer.ggml.token_type"),
|
Types: c.Ints("tokenizer.ggml.token_type"),
|
||||||
Merges: c.Strings("tokenizer.ggml.merges"),
|
Merges: c.Strings("tokenizer.ggml.merges"),
|
||||||
|
|||||||
@@ -7,11 +7,12 @@ import (
|
|||||||
"github.com/ollama/ollama/ml/nn/pooling"
|
"github.com/ollama/ollama/ml/nn/pooling"
|
||||||
"github.com/ollama/ollama/model"
|
"github.com/ollama/ollama/model"
|
||||||
"github.com/ollama/ollama/model/input"
|
"github.com/ollama/ollama/model/input"
|
||||||
|
"github.com/ollama/ollama/tokenizers"
|
||||||
)
|
)
|
||||||
|
|
||||||
type embedModel struct {
|
type embedModel struct {
|
||||||
model.Base
|
model.Base
|
||||||
model.BytePairEncoding
|
tokenizers.Tokenizer
|
||||||
|
|
||||||
*Model
|
*Model
|
||||||
poolingType pooling.Type
|
poolingType pooling.Type
|
||||||
@@ -34,8 +35,8 @@ func newEmbed(c fs.Config) (model.Model, error) {
|
|||||||
layers[i].MLP = &dense{}
|
layers[i].MLP = &dense{}
|
||||||
}
|
}
|
||||||
m := embedModel{
|
m := embedModel{
|
||||||
BytePairEncoding: model.NewBytePairEncoding(
|
Tokenizer: tokenizers.NewBytePairEncoding(
|
||||||
&model.Vocabulary{
|
&tokenizers.Vocabulary{
|
||||||
Values: c.Strings("tokenizer.ggml.tokens"),
|
Values: c.Strings("tokenizer.ggml.tokens"),
|
||||||
Types: c.Ints("tokenizer.ggml.token_type"),
|
Types: c.Ints("tokenizer.ggml.token_type"),
|
||||||
Merges: c.Strings("tokenizer.ggml.merges"),
|
Merges: c.Strings("tokenizer.ggml.merges"),
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ import (
|
|||||||
"github.com/ollama/ollama/ml/nn/rope"
|
"github.com/ollama/ollama/ml/nn/rope"
|
||||||
"github.com/ollama/ollama/model"
|
"github.com/ollama/ollama/model"
|
||||||
"github.com/ollama/ollama/model/input"
|
"github.com/ollama/ollama/model/input"
|
||||||
|
"github.com/ollama/ollama/tokenizers"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Options struct {
|
type Options struct {
|
||||||
@@ -159,7 +160,7 @@ func (d *Layer) Forward(ctx ml.Context, hiddenStates, positions, outputs ml.Tens
|
|||||||
|
|
||||||
type Model struct {
|
type Model struct {
|
||||||
model.Base
|
model.Base
|
||||||
model.BytePairEncoding
|
tokenizers.BytePairEncoding
|
||||||
|
|
||||||
TokenEmbedding *nn.Embedding `gguf:"token_embd"`
|
TokenEmbedding *nn.Embedding `gguf:"token_embd"`
|
||||||
OutputNorm *nn.RMSNorm `gguf:"output_norm"`
|
OutputNorm *nn.RMSNorm `gguf:"output_norm"`
|
||||||
@@ -218,8 +219,8 @@ func New(c fs.Config) (model.Model, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
m := Model{
|
m := Model{
|
||||||
BytePairEncoding: model.NewBytePairEncoding(
|
BytePairEncoding: tokenizers.NewBytePairEncoding(
|
||||||
&model.Vocabulary{
|
&tokenizers.Vocabulary{
|
||||||
Values: c.Strings("tokenizer.ggml.tokens"),
|
Values: c.Strings("tokenizer.ggml.tokens"),
|
||||||
Types: c.Ints("tokenizer.ggml.token_type"),
|
Types: c.Ints("tokenizer.ggml.token_type"),
|
||||||
Merges: c.Strings("tokenizer.ggml.merges"),
|
Merges: c.Strings("tokenizer.ggml.merges"),
|
||||||
|
|||||||
@@ -10,11 +10,12 @@ import (
|
|||||||
"github.com/ollama/ollama/ml"
|
"github.com/ollama/ollama/ml"
|
||||||
"github.com/ollama/ollama/model"
|
"github.com/ollama/ollama/model"
|
||||||
"github.com/ollama/ollama/model/input"
|
"github.com/ollama/ollama/model/input"
|
||||||
|
"github.com/ollama/ollama/tokenizers"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Model struct {
|
type Model struct {
|
||||||
model.Base
|
model.Base
|
||||||
model.TextProcessor
|
tokenizers.Tokenizer
|
||||||
|
|
||||||
*TextModel
|
*TextModel
|
||||||
*VisionModel `gguf:"v"`
|
*VisionModel `gguf:"v"`
|
||||||
@@ -172,8 +173,8 @@ func (m *Model) Forward(ctx ml.Context, batch input.Batch) (ml.Tensor, error) {
|
|||||||
|
|
||||||
func New(c fs.Config) (model.Model, error) {
|
func New(c fs.Config) (model.Model, error) {
|
||||||
m := Model{
|
m := Model{
|
||||||
TextProcessor: model.NewBytePairEncoding(
|
Tokenizer: tokenizers.NewBytePairEncoding(
|
||||||
&model.Vocabulary{
|
&tokenizers.Vocabulary{
|
||||||
Values: c.Strings("tokenizer.ggml.tokens"),
|
Values: c.Strings("tokenizer.ggml.tokens"),
|
||||||
Types: c.Ints("tokenizer.ggml.token_type"),
|
Types: c.Ints("tokenizer.ggml.token_type"),
|
||||||
Merges: c.Strings("tokenizer.ggml.merges"),
|
Merges: c.Strings("tokenizer.ggml.merges"),
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
package parsers
|
package parsers
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"regexp"
|
|
||||||
"strings"
|
"strings"
|
||||||
"unicode"
|
"unicode"
|
||||||
|
|
||||||
@@ -14,243 +13,114 @@ const (
|
|||||||
Nemotron3NanoCollectingThinking Nemotron3NanoParserState = iota
|
Nemotron3NanoCollectingThinking Nemotron3NanoParserState = iota
|
||||||
Nemotron3NanoSkipWhitespaceAfterThinking
|
Nemotron3NanoSkipWhitespaceAfterThinking
|
||||||
Nemotron3NanoCollectingContent
|
Nemotron3NanoCollectingContent
|
||||||
Nemotron3NanoCollectingToolCalls
|
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
nemotronThinkClose = "</think>"
|
nemotronThinkClose = "</think>"
|
||||||
nemotronToolCallOpen = "<tool_call>"
|
nemotronToolCallOpen = "<tool_call>"
|
||||||
nemotronToolCallClose = "</tool_call>"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type Nemotron3NanoParser struct {
|
type Nemotron3NanoParser struct {
|
||||||
state Nemotron3NanoParserState
|
state Nemotron3NanoParserState
|
||||||
buffer strings.Builder
|
buffer strings.Builder
|
||||||
tools []api.Tool
|
toolParser *Qwen3CoderParser
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *Nemotron3NanoParser) HasToolSupport() bool { return true }
|
func (p *Nemotron3NanoParser) HasToolSupport() bool { return true }
|
||||||
func (p *Nemotron3NanoParser) HasThinkingSupport() bool { return true }
|
func (p *Nemotron3NanoParser) HasThinkingSupport() bool { return true }
|
||||||
|
|
||||||
func (p *Nemotron3NanoParser) Init(tools []api.Tool, lastMessage *api.Message, thinkValue *api.ThinkValue) []api.Tool {
|
func (p *Nemotron3NanoParser) Init(tools []api.Tool, lastMessage *api.Message, thinkValue *api.ThinkValue) []api.Tool {
|
||||||
p.tools = tools
|
p.toolParser = &Qwen3CoderParser{}
|
||||||
|
p.toolParser.Init(tools, nil, nil)
|
||||||
|
|
||||||
// thinking is enabled if user requests it
|
|
||||||
thinkingEnabled := thinkValue != nil && thinkValue.Bool()
|
thinkingEnabled := thinkValue != nil && thinkValue.Bool()
|
||||||
|
|
||||||
prefill := lastMessage != nil && lastMessage.Role == "assistant"
|
prefill := lastMessage != nil && lastMessage.Role == "assistant"
|
||||||
|
|
||||||
if !thinkingEnabled {
|
if !thinkingEnabled || (prefill && lastMessage.Content != "") {
|
||||||
p.state = Nemotron3NanoCollectingContent
|
p.state = Nemotron3NanoCollectingContent
|
||||||
return tools
|
} else {
|
||||||
}
|
|
||||||
|
|
||||||
if prefill && lastMessage.Content != "" {
|
|
||||||
p.state = Nemotron3NanoCollectingContent
|
|
||||||
return tools
|
|
||||||
}
|
|
||||||
|
|
||||||
p.state = Nemotron3NanoCollectingThinking
|
p.state = Nemotron3NanoCollectingThinking
|
||||||
|
}
|
||||||
|
|
||||||
return tools
|
return tools
|
||||||
}
|
}
|
||||||
|
|
||||||
type nemotronEvent interface {
|
|
||||||
isNemotronEvent()
|
|
||||||
}
|
|
||||||
|
|
||||||
type nemotronEventThinkingContent struct {
|
|
||||||
content string
|
|
||||||
}
|
|
||||||
|
|
||||||
type nemotronEventContent struct {
|
|
||||||
content string
|
|
||||||
}
|
|
||||||
|
|
||||||
type nemotronEventToolCall struct {
|
|
||||||
toolCall api.ToolCall
|
|
||||||
}
|
|
||||||
|
|
||||||
func (nemotronEventThinkingContent) isNemotronEvent() {}
|
|
||||||
func (nemotronEventContent) isNemotronEvent() {}
|
|
||||||
func (nemotronEventToolCall) isNemotronEvent() {}
|
|
||||||
|
|
||||||
func (p *Nemotron3NanoParser) Add(s string, done bool) (content string, thinking string, calls []api.ToolCall, err error) {
|
func (p *Nemotron3NanoParser) Add(s string, done bool) (content string, thinking string, calls []api.ToolCall, err error) {
|
||||||
|
if p.state == Nemotron3NanoCollectingContent {
|
||||||
|
return p.toolParser.Add(s, done)
|
||||||
|
}
|
||||||
|
|
||||||
|
if p.state == Nemotron3NanoSkipWhitespaceAfterThinking {
|
||||||
|
s = strings.TrimLeftFunc(s, unicode.IsSpace)
|
||||||
|
if s == "" {
|
||||||
|
return "", "", nil, nil
|
||||||
|
}
|
||||||
|
p.state = Nemotron3NanoCollectingContent
|
||||||
|
return p.toolParser.Add(s, done)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Nemotron3NanoCollectingThinking - buffer and look for end markers
|
||||||
p.buffer.WriteString(s)
|
p.buffer.WriteString(s)
|
||||||
events := p.parseEvents()
|
|
||||||
|
|
||||||
var toolCalls []api.ToolCall
|
|
||||||
var contentSb strings.Builder
|
|
||||||
var thinkingSb strings.Builder
|
|
||||||
for _, event := range events {
|
|
||||||
switch event := event.(type) {
|
|
||||||
case nemotronEventToolCall:
|
|
||||||
toolCalls = append(toolCalls, event.toolCall)
|
|
||||||
case nemotronEventThinkingContent:
|
|
||||||
thinkingSb.WriteString(event.content)
|
|
||||||
case nemotronEventContent:
|
|
||||||
contentSb.WriteString(event.content)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return contentSb.String(), thinkingSb.String(), toolCalls, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *Nemotron3NanoParser) parseEvents() []nemotronEvent {
|
|
||||||
var all []nemotronEvent
|
|
||||||
|
|
||||||
keepLooping := true
|
|
||||||
for keepLooping {
|
|
||||||
var events []nemotronEvent
|
|
||||||
events, keepLooping = p.eat()
|
|
||||||
if len(events) > 0 {
|
|
||||||
all = append(all, events...)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return all
|
|
||||||
}
|
|
||||||
|
|
||||||
// emitWithPartialCheck extracts unambiguous content before a potential partial tag
|
|
||||||
func (p *Nemotron3NanoParser) emitWithPartialCheck(bufStr, tag string) (unambiguous, ambiguous string) {
|
|
||||||
if overlapLen := overlap(bufStr, tag); overlapLen > 0 {
|
|
||||||
beforePartialTag := bufStr[:len(bufStr)-overlapLen]
|
|
||||||
trailingLen := trailingWhitespaceLen(beforePartialTag)
|
|
||||||
return bufStr[:len(beforePartialTag)-trailingLen], bufStr[len(beforePartialTag)-trailingLen:]
|
|
||||||
}
|
|
||||||
wsLen := trailingWhitespaceLen(bufStr)
|
|
||||||
return bufStr[:len(bufStr)-wsLen], bufStr[len(bufStr)-wsLen:]
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *Nemotron3NanoParser) eat() ([]nemotronEvent, bool) {
|
|
||||||
bufStr := p.buffer.String()
|
bufStr := p.buffer.String()
|
||||||
if bufStr == "" {
|
|
||||||
return nil, false
|
// Look for end of thinking: </think> or <tool_call> (model may skip </think>)
|
||||||
|
thinkIdx := strings.Index(bufStr, nemotronThinkClose)
|
||||||
|
toolIdx := strings.Index(bufStr, nemotronToolCallOpen)
|
||||||
|
|
||||||
|
var endIdx int = -1
|
||||||
|
var remainder string
|
||||||
|
|
||||||
|
if thinkIdx != -1 && (toolIdx == -1 || thinkIdx < toolIdx) {
|
||||||
|
endIdx = thinkIdx
|
||||||
|
remainder = strings.TrimLeftFunc(bufStr[thinkIdx+len(nemotronThinkClose):], unicode.IsSpace)
|
||||||
|
} else if toolIdx != -1 {
|
||||||
|
endIdx = toolIdx
|
||||||
|
remainder = bufStr[toolIdx:] // Include <tool_call> tag
|
||||||
}
|
}
|
||||||
|
|
||||||
switch p.state {
|
if endIdx != -1 {
|
||||||
case Nemotron3NanoCollectingThinking:
|
thinking = strings.TrimRightFunc(bufStr[:endIdx], unicode.IsSpace)
|
||||||
if strings.Contains(bufStr, nemotronThinkClose) {
|
|
||||||
split := strings.SplitN(bufStr, nemotronThinkClose, 2)
|
|
||||||
thinking := strings.TrimRightFunc(split[0], unicode.IsSpace)
|
|
||||||
p.buffer.Reset()
|
p.buffer.Reset()
|
||||||
remainder := strings.TrimLeftFunc(split[1], unicode.IsSpace)
|
|
||||||
p.buffer.WriteString(remainder)
|
|
||||||
// Transition to whitespace-skipping state if buffer is empty,
|
|
||||||
// otherwise go directly to content collection
|
|
||||||
if remainder == "" {
|
if remainder == "" {
|
||||||
p.state = Nemotron3NanoSkipWhitespaceAfterThinking
|
p.state = Nemotron3NanoSkipWhitespaceAfterThinking
|
||||||
} else {
|
} else {
|
||||||
p.state = Nemotron3NanoCollectingContent
|
p.state = Nemotron3NanoCollectingContent
|
||||||
|
content, _, calls, err = p.toolParser.Add(remainder, done)
|
||||||
}
|
}
|
||||||
if thinking != "" {
|
return content, thinking, calls, err
|
||||||
return []nemotronEvent{nemotronEventThinkingContent{content: thinking}}, true
|
|
||||||
}
|
}
|
||||||
return nil, true
|
|
||||||
|
// No end marker - emit unambiguous thinking
|
||||||
|
thinking = p.emitThinking(bufStr)
|
||||||
|
return "", thinking, nil, nil
|
||||||
}
|
}
|
||||||
unambig, ambig := p.emitWithPartialCheck(bufStr, nemotronThinkClose)
|
|
||||||
|
// emitThinking returns unambiguous thinking content, keeping potential partial tags in buffer
|
||||||
|
func (p *Nemotron3NanoParser) emitThinking(bufStr string) string {
|
||||||
|
// Check for partial </think> or <tool_call> at end
|
||||||
|
thinkOverlap := overlap(bufStr, nemotronThinkClose)
|
||||||
|
toolOverlap := overlap(bufStr, nemotronToolCallOpen)
|
||||||
|
maxOverlap := max(thinkOverlap, toolOverlap)
|
||||||
|
|
||||||
|
if maxOverlap > 0 {
|
||||||
|
unambiguous := bufStr[:len(bufStr)-maxOverlap]
|
||||||
|
unambiguous = strings.TrimRightFunc(unambiguous, unicode.IsSpace)
|
||||||
p.buffer.Reset()
|
p.buffer.Reset()
|
||||||
p.buffer.WriteString(ambig)
|
p.buffer.WriteString(bufStr[len(bufStr)-maxOverlap:])
|
||||||
if unambig != "" {
|
return unambiguous
|
||||||
return []nemotronEvent{nemotronEventThinkingContent{content: unambig}}, false
|
|
||||||
}
|
}
|
||||||
return nil, false
|
|
||||||
|
|
||||||
// We only want to skip whitespace between thinking and content
|
// No partial tags - emit all but trailing whitespace
|
||||||
case Nemotron3NanoSkipWhitespaceAfterThinking:
|
wsLen := trailingWhitespaceLen(bufStr)
|
||||||
bufStr = strings.TrimLeftFunc(bufStr, unicode.IsSpace)
|
if wsLen > 0 {
|
||||||
|
unambiguous := bufStr[:len(bufStr)-wsLen]
|
||||||
p.buffer.Reset()
|
p.buffer.Reset()
|
||||||
p.buffer.WriteString(bufStr)
|
p.buffer.WriteString(bufStr[len(bufStr)-wsLen:])
|
||||||
if bufStr == "" {
|
return unambiguous
|
||||||
return nil, false
|
|
||||||
}
|
}
|
||||||
p.state = Nemotron3NanoCollectingContent
|
|
||||||
return nil, true
|
|
||||||
|
|
||||||
case Nemotron3NanoCollectingContent:
|
// Nothing to hold back
|
||||||
if strings.Contains(bufStr, nemotronToolCallOpen) {
|
|
||||||
split := strings.SplitN(bufStr, nemotronToolCallOpen, 2)
|
|
||||||
content := strings.TrimRightFunc(split[0], unicode.IsSpace)
|
|
||||||
p.buffer.Reset()
|
p.buffer.Reset()
|
||||||
p.buffer.WriteString(split[1])
|
return bufStr
|
||||||
p.state = Nemotron3NanoCollectingToolCalls
|
|
||||||
if content != "" {
|
|
||||||
return []nemotronEvent{nemotronEventContent{content: content}}, true
|
|
||||||
}
|
|
||||||
return nil, true
|
|
||||||
}
|
|
||||||
unambig, ambig := p.emitWithPartialCheck(bufStr, nemotronToolCallOpen)
|
|
||||||
p.buffer.Reset()
|
|
||||||
p.buffer.WriteString(ambig)
|
|
||||||
if unambig != "" {
|
|
||||||
return []nemotronEvent{nemotronEventContent{content: unambig}}, false
|
|
||||||
}
|
|
||||||
return nil, false
|
|
||||||
|
|
||||||
case Nemotron3NanoCollectingToolCalls:
|
|
||||||
if strings.Contains(bufStr, nemotronToolCallClose) {
|
|
||||||
split := strings.SplitN(bufStr, nemotronToolCallClose, 2)
|
|
||||||
remaining := strings.TrimLeftFunc(split[1], unicode.IsSpace)
|
|
||||||
p.buffer.Reset()
|
|
||||||
p.buffer.WriteString(remaining)
|
|
||||||
|
|
||||||
var events []nemotronEvent
|
|
||||||
if tc, err := p.parseToolCall(split[0]); err == nil {
|
|
||||||
events = append(events, nemotronEventToolCall{toolCall: tc})
|
|
||||||
}
|
|
||||||
|
|
||||||
if !strings.Contains(remaining, nemotronToolCallOpen) {
|
|
||||||
p.state = Nemotron3NanoCollectingContent
|
|
||||||
}
|
|
||||||
return events, true
|
|
||||||
}
|
|
||||||
return nil, false
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil, false
|
|
||||||
}
|
|
||||||
|
|
||||||
var (
|
|
||||||
nemotronFunctionRegex = regexp.MustCompile(`<function=([^>]+)>`)
|
|
||||||
nemotronParameterRegex = regexp.MustCompile(`<parameter=([^>]+)>\n?([\s\S]*?)\n?</parameter>`)
|
|
||||||
)
|
|
||||||
|
|
||||||
func (p *Nemotron3NanoParser) parseToolCall(content string) (api.ToolCall, error) {
|
|
||||||
toolCall := api.ToolCall{}
|
|
||||||
|
|
||||||
// Extract function name
|
|
||||||
fnMatch := nemotronFunctionRegex.FindStringSubmatch(content)
|
|
||||||
if len(fnMatch) < 2 {
|
|
||||||
return toolCall, nil
|
|
||||||
}
|
|
||||||
toolCall.Function.Name = fnMatch[1]
|
|
||||||
|
|
||||||
// Extract parameters
|
|
||||||
toolCall.Function.Arguments = api.NewToolCallFunctionArguments()
|
|
||||||
paramMatches := nemotronParameterRegex.FindAllStringSubmatch(content, -1)
|
|
||||||
for _, match := range paramMatches {
|
|
||||||
if len(match) >= 3 {
|
|
||||||
paramName := match[1]
|
|
||||||
paramValue := strings.TrimSpace(match[2])
|
|
||||||
|
|
||||||
// Try to parse as typed value based on tool definition
|
|
||||||
toolCall.Function.Arguments.Set(paramName, p.parseParamValue(paramName, paramValue))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return toolCall, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *Nemotron3NanoParser) parseParamValue(paramName string, raw string) any {
|
|
||||||
// Find the matching tool to get parameter type
|
|
||||||
var paramType api.PropertyType
|
|
||||||
for _, tool := range p.tools {
|
|
||||||
if tool.Function.Parameters.Properties != nil {
|
|
||||||
if prop, ok := tool.Function.Parameters.Properties.Get(paramName); ok {
|
|
||||||
paramType = prop.Type
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return parseValue(raw, paramType)
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -8,6 +8,8 @@ import (
|
|||||||
"github.com/ollama/ollama/api"
|
"github.com/ollama/ollama/api"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// TestNemotron3NanoParser tests Nemotron-specific behavior (thinking support).
|
||||||
|
// Tool call parsing is tested in qwen3coder_test.go since Nemotron delegates to Qwen3CoderParser.
|
||||||
func TestNemotron3NanoParser(t *testing.T) {
|
func TestNemotron3NanoParser(t *testing.T) {
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
name string
|
name string
|
||||||
@@ -17,18 +19,6 @@ func TestNemotron3NanoParser(t *testing.T) {
|
|||||||
expectedThinking string
|
expectedThinking string
|
||||||
expectedCalls []api.ToolCall
|
expectedCalls []api.ToolCall
|
||||||
}{
|
}{
|
||||||
{
|
|
||||||
name: "simple content - no thinking",
|
|
||||||
input: "Hello, how can I help you?",
|
|
||||||
thinkValue: nil,
|
|
||||||
expectedContent: "Hello, how can I help you?",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "simple content - thinking disabled",
|
|
||||||
input: "Hello, how can I help you?",
|
|
||||||
thinkValue: &api.ThinkValue{Value: false},
|
|
||||||
expectedContent: "Hello, how can I help you?",
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
name: "thinking then content",
|
name: "thinking then content",
|
||||||
input: "Let me think about this...</think>\nHere is my answer.",
|
input: "Let me think about this...</think>\nHere is my answer.",
|
||||||
@@ -43,69 +33,6 @@ func TestNemotron3NanoParser(t *testing.T) {
|
|||||||
expectedThinking: "Step 1: Analyze\nStep 2: Process\nStep 3: Conclude",
|
expectedThinking: "Step 1: Analyze\nStep 2: Process\nStep 3: Conclude",
|
||||||
expectedContent: "The answer is 42.",
|
expectedContent: "The answer is 42.",
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name: "simple tool call",
|
|
||||||
input: "<tool_call>\n<function=get_weather>\n<parameter=city>\nParis\n</parameter>\n</function>\n</tool_call>",
|
|
||||||
thinkValue: nil,
|
|
||||||
expectedCalls: []api.ToolCall{
|
|
||||||
{
|
|
||||||
Function: api.ToolCallFunction{
|
|
||||||
Name: "get_weather",
|
|
||||||
Arguments: testArgs(map[string]any{"city": "Paris"}),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "content then tool call",
|
|
||||||
input: "Let me check the weather.\n<tool_call>\n<function=get_weather>\n<parameter=city>\nNYC\n</parameter>\n</function>\n</tool_call>",
|
|
||||||
thinkValue: nil,
|
|
||||||
expectedContent: "Let me check the weather.",
|
|
||||||
expectedCalls: []api.ToolCall{
|
|
||||||
{
|
|
||||||
Function: api.ToolCallFunction{
|
|
||||||
Name: "get_weather",
|
|
||||||
Arguments: testArgs(map[string]any{"city": "NYC"}),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "tool call with multiple parameters",
|
|
||||||
input: "<tool_call>\n<function=book_flight>\n<parameter=from>\nSFO\n</parameter>\n<parameter=to>\nNYC\n</parameter>\n</function>\n</tool_call>",
|
|
||||||
thinkValue: nil,
|
|
||||||
expectedCalls: []api.ToolCall{
|
|
||||||
{
|
|
||||||
Function: api.ToolCallFunction{
|
|
||||||
Name: "book_flight",
|
|
||||||
Arguments: testArgs(map[string]any{
|
|
||||||
"from": "SFO",
|
|
||||||
"to": "NYC",
|
|
||||||
}),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "multiple tool calls",
|
|
||||||
input: "<tool_call>\n<function=get_weather>\n<parameter=city>\nSan Francisco\n</parameter>\n</function>\n</tool_call>\n" +
|
|
||||||
"<tool_call>\n<function=get_weather>\n<parameter=city>\nNew York\n</parameter>\n</function>\n</tool_call>",
|
|
||||||
thinkValue: nil,
|
|
||||||
expectedCalls: []api.ToolCall{
|
|
||||||
{
|
|
||||||
Function: api.ToolCallFunction{
|
|
||||||
Name: "get_weather",
|
|
||||||
Arguments: testArgs(map[string]any{"city": "San Francisco"}),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
Function: api.ToolCallFunction{
|
|
||||||
Name: "get_weather",
|
|
||||||
Arguments: testArgs(map[string]any{"city": "New York"}),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
name: "thinking then tool call",
|
name: "thinking then tool call",
|
||||||
input: "I should check the weather...</think>\n<tool_call>\n<function=get_weather>\n<parameter=city>\nParis\n</parameter>\n</function>\n</tool_call>",
|
input: "I should check the weather...</think>\n<tool_call>\n<function=get_weather>\n<parameter=city>\nParis\n</parameter>\n</function>\n</tool_call>",
|
||||||
@@ -135,19 +62,6 @@ func TestNemotron3NanoParser(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name: "tool call with multiline parameter value",
|
|
||||||
input: "<tool_call>\n<function=create_note>\n<parameter=content>\nLine 1\nLine 2\nLine 3\n</parameter>\n</function>\n</tool_call>",
|
|
||||||
thinkValue: nil,
|
|
||||||
expectedCalls: []api.ToolCall{
|
|
||||||
{
|
|
||||||
Function: api.ToolCallFunction{
|
|
||||||
Name: "create_note",
|
|
||||||
Arguments: testArgs(map[string]any{"content": "Line 1\nLine 2\nLine 3"}),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
name: "empty thinking block - immediate close",
|
name: "empty thinking block - immediate close",
|
||||||
input: "</think>\nHere is my answer.",
|
input: "</think>\nHere is my answer.",
|
||||||
@@ -161,18 +75,6 @@ func TestNemotron3NanoParser(t *testing.T) {
|
|||||||
thinkValue: &api.ThinkValue{Value: false},
|
thinkValue: &api.ThinkValue{Value: false},
|
||||||
expectedContent: "</think>\nSome content after spurious tag.",
|
expectedContent: "</think>\nSome content after spurious tag.",
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name: "tool call with no function name - returns empty tool call",
|
|
||||||
input: "<tool_call>\n<function=>\n</function>\n</tool_call>",
|
|
||||||
thinkValue: nil,
|
|
||||||
expectedCalls: []api.ToolCall{{Function: api.ToolCallFunction{Name: "", Arguments: api.NewToolCallFunctionArguments()}}},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "content with newlines preserved",
|
|
||||||
input: "Line 1\n\nLine 2\n\n\nLine 3",
|
|
||||||
thinkValue: nil,
|
|
||||||
expectedContent: "Line 1\n\nLine 2\n\n\nLine 3",
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
name: "thinking with only whitespace after close tag",
|
name: "thinking with only whitespace after close tag",
|
||||||
input: "My thoughts...</think> \n\t\n Content here.",
|
input: "My thoughts...</think> \n\t\n Content here.",
|
||||||
@@ -180,25 +82,6 @@ func TestNemotron3NanoParser(t *testing.T) {
|
|||||||
expectedThinking: "My thoughts...",
|
expectedThinking: "My thoughts...",
|
||||||
expectedContent: "Content here.",
|
expectedContent: "Content here.",
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name: "unicode content",
|
|
||||||
input: "Hello 世界! 🌍 Ñoño",
|
|
||||||
thinkValue: nil,
|
|
||||||
expectedContent: "Hello 世界! 🌍 Ñoño",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "tool call with numeric parameter",
|
|
||||||
input: "<tool_call>\n<function=set_temp>\n<parameter=value>\n42\n</parameter>\n</function>\n</tool_call>",
|
|
||||||
thinkValue: nil,
|
|
||||||
expectedCalls: []api.ToolCall{
|
|
||||||
{
|
|
||||||
Function: api.ToolCallFunction{
|
|
||||||
Name: "set_temp",
|
|
||||||
Arguments: testArgs(map[string]any{"value": "42"}),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, tt := range tests {
|
for _, tt := range tests {
|
||||||
@@ -233,6 +116,8 @@ func TestNemotron3NanoParser(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestNemotron3NanoParser_Streaming tests streaming behavior for thinking support.
|
||||||
|
// Tool call streaming is tested in qwen3coder_test.go.
|
||||||
func TestNemotron3NanoParser_Streaming(t *testing.T) {
|
func TestNemotron3NanoParser_Streaming(t *testing.T) {
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
name string
|
name string
|
||||||
@@ -242,18 +127,6 @@ func TestNemotron3NanoParser_Streaming(t *testing.T) {
|
|||||||
expectedThinking string
|
expectedThinking string
|
||||||
expectedCalls []api.ToolCall
|
expectedCalls []api.ToolCall
|
||||||
}{
|
}{
|
||||||
{
|
|
||||||
name: "streaming content character by character",
|
|
||||||
chunks: []string{"H", "e", "l", "l", "o", ",", " ", "w", "o", "r", "l", "d", "!"},
|
|
||||||
thinkValue: nil,
|
|
||||||
expectedContent: "Hello, world!",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "streaming content small tokens",
|
|
||||||
chunks: []string{"Hel", "lo", ", ", "how ", "can", " I", " help", " you", " today", "?"},
|
|
||||||
thinkValue: nil,
|
|
||||||
expectedContent: "Hello, how can I help you today?",
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
name: "streaming thinking then content - granular",
|
name: "streaming thinking then content - granular",
|
||||||
chunks: []string{"Let", " me", " th", "ink", " about", " this", "...", "<", "/", "think", ">", "\n", "Here", " is", " my", " answer", "."},
|
chunks: []string{"Let", " me", " th", "ink", " about", " this", "...", "<", "/", "think", ">", "\n", "Here", " is", " my", " answer", "."},
|
||||||
@@ -268,45 +141,6 @@ func TestNemotron3NanoParser_Streaming(t *testing.T) {
|
|||||||
expectedThinking: "Step 1: Analyze\nStep 2: Process",
|
expectedThinking: "Step 1: Analyze\nStep 2: Process",
|
||||||
expectedContent: "The answer.",
|
expectedContent: "The answer.",
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name: "streaming tool call - highly granular",
|
|
||||||
chunks: []string{"<", "tool", "_", "call", ">", "\n", "<", "func", "tion", "=", "get", "_", "weather", ">", "\n", "<", "param", "eter", "=", "city", ">", "\n", "Par", "is", "\n", "</", "param", "eter", ">", "\n", "</", "func", "tion", ">", "\n", "</", "tool", "_", "call", ">"},
|
|
||||||
thinkValue: nil,
|
|
||||||
expectedCalls: []api.ToolCall{
|
|
||||||
{
|
|
||||||
Function: api.ToolCallFunction{
|
|
||||||
Name: "get_weather",
|
|
||||||
Arguments: testArgs(map[string]any{"city": "Paris"}),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "streaming content then tool call - granular",
|
|
||||||
chunks: []string{"Let", " me", " check", " the", " weather", ".", "\n<", "tool_call", ">", "\n", "<function=", "get_weather", ">", "\n", "<parameter=", "city", ">", "\n", "NYC", "\n", "</parameter>", "\n", "</function>", "\n", "</tool_call>"},
|
|
||||||
thinkValue: nil,
|
|
||||||
expectedContent: "Let me check the weather.",
|
|
||||||
expectedCalls: []api.ToolCall{
|
|
||||||
{
|
|
||||||
Function: api.ToolCallFunction{
|
|
||||||
Name: "get_weather",
|
|
||||||
Arguments: testArgs(map[string]any{"city": "NYC"}),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "tool call tag split character by character",
|
|
||||||
chunks: []string{"<", "t", "o", "o", "l", "_", "c", "a", "l", "l", ">", "\n", "<", "f", "u", "n", "c", "t", "i", "o", "n", "=", "t", "e", "s", "t", ">", "\n", "<", "/", "f", "u", "n", "c", "t", "i", "o", "n", ">", "\n", "<", "/", "t", "o", "o", "l", "_", "c", "a", "l", "l", ">"},
|
|
||||||
expectedCalls: []api.ToolCall{
|
|
||||||
{
|
|
||||||
Function: api.ToolCallFunction{
|
|
||||||
Name: "test",
|
|
||||||
Arguments: api.NewToolCallFunctionArguments(),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
name: "thinking close tag split character by character",
|
name: "thinking close tag split character by character",
|
||||||
chunks: []string{"I", "'", "m", " ", "t", "h", "i", "n", "k", "i", "n", "g", ".", ".", ".", "<", "/", "t", "h", "i", "n", "k", ">", "\n", "D", "o", "n", "e", "!"},
|
chunks: []string{"I", "'", "m", " ", "t", "h", "i", "n", "k", "i", "n", "g", ".", ".", ".", "<", "/", "t", "h", "i", "n", "k", ">", "\n", "D", "o", "n", "e", "!"},
|
||||||
@@ -321,22 +155,6 @@ func TestNemotron3NanoParser_Streaming(t *testing.T) {
|
|||||||
expectedThinking: "Thinking...",
|
expectedThinking: "Thinking...",
|
||||||
expectedContent: "Content here.",
|
expectedContent: "Content here.",
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name: "tool call with multiple parameters - streaming",
|
|
||||||
chunks: []string{"<tool_", "call>\n", "<function", "=book_", "flight>", "\n<para", "meter=", "from>\n", "SFO\n", "</param", "eter>", "\n<param", "eter=to", ">\nNYC", "\n</para", "meter>", "\n</func", "tion>\n", "</tool_", "call>"},
|
|
||||||
thinkValue: nil,
|
|
||||||
expectedCalls: []api.ToolCall{
|
|
||||||
{
|
|
||||||
Function: api.ToolCallFunction{
|
|
||||||
Name: "book_flight",
|
|
||||||
Arguments: testArgs(map[string]any{
|
|
||||||
"from": "SFO",
|
|
||||||
"to": "NYC",
|
|
||||||
}),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
name: "thinking then content then tool call - streaming",
|
name: "thinking then content then tool call - streaming",
|
||||||
chunks: []string{"Ana", "lyzing", " your", " request", "...", "</", "think", ">\n", "I'll", " check", " that", " for", " you", ".", "\n", "<tool", "_call", ">\n", "<function", "=search", ">\n", "<parameter", "=query", ">\n", "test", " query", "\n</", "parameter", ">\n", "</function", ">\n", "</tool", "_call", ">"},
|
chunks: []string{"Ana", "lyzing", " your", " request", "...", "</", "think", ">\n", "I'll", " check", " that", " for", " you", ".", "\n", "<tool", "_call", ">\n", "<function", "=search", ">\n", "<parameter", "=query", ">\n", "test", " query", "\n</", "parameter", ">\n", "</function", ">\n", "</tool", "_call", ">"},
|
||||||
@@ -352,45 +170,6 @@ func TestNemotron3NanoParser_Streaming(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name: "multiple tool calls - streaming",
|
|
||||||
chunks: []string{
|
|
||||||
"<tool_call>", "\n", "<function=", "get_weather>", "\n",
|
|
||||||
"<parameter=", "city>\n", "San Fran", "cisco\n", "</parameter>", "\n",
|
|
||||||
"</function>", "\n", "</tool_call>", "\n",
|
|
||||||
"<tool_", "call>\n", "<function", "=get_weather", ">\n",
|
|
||||||
"<param", "eter=city", ">\nNew", " York\n", "</parameter>\n",
|
|
||||||
"</function>\n", "</tool_call>",
|
|
||||||
},
|
|
||||||
thinkValue: nil,
|
|
||||||
expectedCalls: []api.ToolCall{
|
|
||||||
{
|
|
||||||
Function: api.ToolCallFunction{
|
|
||||||
Name: "get_weather",
|
|
||||||
Arguments: testArgs(map[string]any{"city": "San Francisco"}),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
Function: api.ToolCallFunction{
|
|
||||||
Name: "get_weather",
|
|
||||||
Arguments: testArgs(map[string]any{"city": "New York"}),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "tool call with multiline parameter - streaming",
|
|
||||||
chunks: []string{"<tool_call>\n", "<function=", "create_note>\n", "<parameter=", "content>\n", "Line 1", "\nLine", " 2\n", "Line 3", "\n</parameter>\n", "</function>\n", "</tool_call>"},
|
|
||||||
thinkValue: nil,
|
|
||||||
expectedCalls: []api.ToolCall{
|
|
||||||
{
|
|
||||||
Function: api.ToolCallFunction{
|
|
||||||
Name: "create_note",
|
|
||||||
Arguments: testArgs(map[string]any{"content": "Line 1\nLine 2\nLine 3"}),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
name: "empty thinking block",
|
name: "empty thinking block",
|
||||||
chunks: []string{"</think>", "\n", "Just content."},
|
chunks: []string{"</think>", "\n", "Just content."},
|
||||||
@@ -398,12 +177,6 @@ func TestNemotron3NanoParser_Streaming(t *testing.T) {
|
|||||||
expectedThinking: "",
|
expectedThinking: "",
|
||||||
expectedContent: "Just content.",
|
expectedContent: "Just content.",
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name: "empty input chunks interspersed",
|
|
||||||
chunks: []string{"Hello", "", " ", "", "world", "", "!"},
|
|
||||||
thinkValue: nil,
|
|
||||||
expectedContent: "Hello world!",
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
name: "tool call immediately after think close - no content",
|
name: "tool call immediately after think close - no content",
|
||||||
chunks: []string{"Analyzing...", "</think>", "\n", "<tool_call>", "\n<function=test>\n</function>\n", "</tool_call>"},
|
chunks: []string{"Analyzing...", "</think>", "\n", "<tool_call>", "\n<function=test>\n</function>\n", "</tool_call>"},
|
||||||
@@ -418,25 +191,6 @@ func TestNemotron3NanoParser_Streaming(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name: "tool call with empty parameter value",
|
|
||||||
chunks: []string{"<tool_call>\n<function=test>\n<parameter=name>\n", "\n</parameter>\n</function>\n</tool_call>"},
|
|
||||||
thinkValue: nil,
|
|
||||||
expectedCalls: []api.ToolCall{
|
|
||||||
{
|
|
||||||
Function: api.ToolCallFunction{
|
|
||||||
Name: "test",
|
|
||||||
Arguments: testArgs(map[string]any{"name": ""}),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "partial tool call tag at end - buffered",
|
|
||||||
chunks: []string{"Here's some content", "<tool"},
|
|
||||||
thinkValue: nil,
|
|
||||||
expectedContent: "Here's some content",
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, tt := range tests {
|
for _, tt := range tests {
|
||||||
@@ -572,3 +326,65 @@ func TestNemotron3NanoParser_WithTools(t *testing.T) {
|
|||||||
t.Errorf("calls mismatch (-got +want):\n%s", diff)
|
t.Errorf("calls mismatch (-got +want):\n%s", diff)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestNemotron3NanoParser_ToolCallWithoutThinkClose tests the case where thinking is enabled
|
||||||
|
// but the model outputs content + tool call WITHOUT the </think> tag.
|
||||||
|
// The parser should still parse the tool call (content before is treated as thinking).
|
||||||
|
func TestNemotron3NanoParser_ToolCallWithoutThinkClose(t *testing.T) {
|
||||||
|
chunks := []string{
|
||||||
|
"Let", " me", " analyze", " this", ".", "\n",
|
||||||
|
"<tool_call>", "\n",
|
||||||
|
"<function=get_weather>", "\n",
|
||||||
|
"<parameter=city>", "Paris", "</parameter>", "\n",
|
||||||
|
"</function>", "\n",
|
||||||
|
"</tool_call>",
|
||||||
|
}
|
||||||
|
|
||||||
|
p := &Nemotron3NanoParser{}
|
||||||
|
p.Init(nil, nil, &api.ThinkValue{Value: true}) // thinking ENABLED but model doesn't output </think>
|
||||||
|
|
||||||
|
var allContent string
|
||||||
|
var allThinking string
|
||||||
|
var allCalls []api.ToolCall
|
||||||
|
|
||||||
|
for _, chunk := range chunks {
|
||||||
|
content, thinking, calls, err := p.Add(chunk, false)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
allContent += content
|
||||||
|
allThinking += thinking
|
||||||
|
allCalls = append(allCalls, calls...)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Drain
|
||||||
|
content, thinking, calls, err := p.Add("", true)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error on done: %v", err)
|
||||||
|
}
|
||||||
|
allContent += content
|
||||||
|
allThinking += thinking
|
||||||
|
allCalls = append(allCalls, calls...)
|
||||||
|
|
||||||
|
// The parser was in thinking mode, so text before <tool_call> is emitted as thinking.
|
||||||
|
expectedThinking := "Let me analyze this."
|
||||||
|
|
||||||
|
expectedCalls := []api.ToolCall{
|
||||||
|
{
|
||||||
|
Function: api.ToolCallFunction{
|
||||||
|
Name: "get_weather",
|
||||||
|
Arguments: testArgs(map[string]any{"city": "Paris"}),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
if allContent != "" {
|
||||||
|
t.Errorf("expected no content (text was streamed as thinking), got: %q", allContent)
|
||||||
|
}
|
||||||
|
if diff := cmp.Diff(allThinking, expectedThinking); diff != "" {
|
||||||
|
t.Errorf("thinking mismatch (-got +want):\n%s", diff)
|
||||||
|
}
|
||||||
|
if diff := cmp.Diff(allCalls, expectedCalls, argsComparer); diff != "" {
|
||||||
|
t.Errorf("calls mismatch (-got +want):\n%s", diff)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -91,6 +91,37 @@ func TestQwenParserStreaming(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
desc: "tool call tags split character by character",
|
||||||
|
steps: []step{
|
||||||
|
{input: "<", wantEvents: []qwenEvent{}},
|
||||||
|
{input: "t", wantEvents: []qwenEvent{}},
|
||||||
|
{input: "o", wantEvents: []qwenEvent{}},
|
||||||
|
{input: "o", wantEvents: []qwenEvent{}},
|
||||||
|
{input: "l", wantEvents: []qwenEvent{}},
|
||||||
|
{input: "_", wantEvents: []qwenEvent{}},
|
||||||
|
{input: "c", wantEvents: []qwenEvent{}},
|
||||||
|
{input: "a", wantEvents: []qwenEvent{}},
|
||||||
|
{input: "l", wantEvents: []qwenEvent{}},
|
||||||
|
{input: "l", wantEvents: []qwenEvent{}},
|
||||||
|
{input: ">", wantEvents: []qwenEvent{}},
|
||||||
|
{input: "a", wantEvents: []qwenEvent{}},
|
||||||
|
{input: "b", wantEvents: []qwenEvent{}},
|
||||||
|
{input: "c", wantEvents: []qwenEvent{}},
|
||||||
|
{input: "<", wantEvents: []qwenEvent{}},
|
||||||
|
{input: "/", wantEvents: []qwenEvent{}},
|
||||||
|
{input: "t", wantEvents: []qwenEvent{}},
|
||||||
|
{input: "o", wantEvents: []qwenEvent{}},
|
||||||
|
{input: "o", wantEvents: []qwenEvent{}},
|
||||||
|
{input: "l", wantEvents: []qwenEvent{}},
|
||||||
|
{input: "_", wantEvents: []qwenEvent{}},
|
||||||
|
{input: "c", wantEvents: []qwenEvent{}},
|
||||||
|
{input: "a", wantEvents: []qwenEvent{}},
|
||||||
|
{input: "l", wantEvents: []qwenEvent{}},
|
||||||
|
{input: "l", wantEvents: []qwenEvent{}},
|
||||||
|
{input: ">", wantEvents: []qwenEvent{qwenEventRawToolCall{raw: "abc"}}},
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
desc: "trailing whitespace between content and tool call",
|
desc: "trailing whitespace between content and tool call",
|
||||||
steps: []step{
|
steps: []step{
|
||||||
|
|||||||
@@ -630,6 +630,10 @@ func nameFromToolCallID(messages []Message, toolCallID string) string {
|
|||||||
|
|
||||||
// decodeImageURL decodes a base64 data URI into raw image bytes.
|
// decodeImageURL decodes a base64 data URI into raw image bytes.
|
||||||
func decodeImageURL(url string) (api.ImageData, error) {
|
func decodeImageURL(url string) (api.ImageData, error) {
|
||||||
|
if strings.HasPrefix(url, "http://") || strings.HasPrefix(url, "https://") {
|
||||||
|
return nil, errors.New("image URLs are not currently supported, please use base64 encoded data instead")
|
||||||
|
}
|
||||||
|
|
||||||
types := []string{"jpeg", "jpg", "png", "webp"}
|
types := []string{"jpeg", "jpg", "png", "webp"}
|
||||||
|
|
||||||
// Support blank mime type to match /api/chat's behavior of taking just unadorned base64
|
// Support blank mime type to match /api/chat's behavior of taking just unadorned base64
|
||||||
@@ -733,3 +737,60 @@ func FromCompleteRequest(r CompletionRequest) (api.GenerateRequest, error) {
|
|||||||
DebugRenderOnly: r.DebugRenderOnly,
|
DebugRenderOnly: r.DebugRenderOnly,
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ImageGenerationRequest is an OpenAI-compatible image generation request.
|
||||||
|
type ImageGenerationRequest struct {
|
||||||
|
Model string `json:"model"`
|
||||||
|
Prompt string `json:"prompt"`
|
||||||
|
N int `json:"n,omitempty"`
|
||||||
|
Size string `json:"size,omitempty"`
|
||||||
|
ResponseFormat string `json:"response_format,omitempty"`
|
||||||
|
Seed *int64 `json:"seed,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// ImageGenerationResponse is an OpenAI-compatible image generation response.
|
||||||
|
type ImageGenerationResponse struct {
|
||||||
|
Created int64 `json:"created"`
|
||||||
|
Data []ImageURLOrData `json:"data"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// ImageURLOrData contains either a URL or base64-encoded image data.
|
||||||
|
type ImageURLOrData struct {
|
||||||
|
URL string `json:"url,omitempty"`
|
||||||
|
B64JSON string `json:"b64_json,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// FromImageGenerationRequest converts an OpenAI image generation request to an Ollama GenerateRequest.
|
||||||
|
func FromImageGenerationRequest(r ImageGenerationRequest) api.GenerateRequest {
|
||||||
|
req := api.GenerateRequest{
|
||||||
|
Model: r.Model,
|
||||||
|
Prompt: r.Prompt,
|
||||||
|
}
|
||||||
|
// Parse size if provided (e.g., "1024x768")
|
||||||
|
if r.Size != "" {
|
||||||
|
var w, h int32
|
||||||
|
if _, err := fmt.Sscanf(r.Size, "%dx%d", &w, &h); err == nil {
|
||||||
|
req.Width = w
|
||||||
|
req.Height = h
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if r.Seed != nil {
|
||||||
|
if req.Options == nil {
|
||||||
|
req.Options = map[string]any{}
|
||||||
|
}
|
||||||
|
req.Options["seed"] = *r.Seed
|
||||||
|
}
|
||||||
|
return req
|
||||||
|
}
|
||||||
|
|
||||||
|
// ToImageGenerationResponse converts an Ollama GenerateResponse to an OpenAI ImageGenerationResponse.
|
||||||
|
func ToImageGenerationResponse(resp api.GenerateResponse) ImageGenerationResponse {
|
||||||
|
var data []ImageURLOrData
|
||||||
|
if resp.Image != "" {
|
||||||
|
data = []ImageURLOrData{{B64JSON: resp.Image}}
|
||||||
|
}
|
||||||
|
return ImageGenerationResponse{
|
||||||
|
Created: resp.CreatedAt.Unix(),
|
||||||
|
Data: data,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"math/rand"
|
"math/rand"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/ollama/ollama/api"
|
"github.com/ollama/ollama/api"
|
||||||
)
|
)
|
||||||
@@ -265,9 +266,9 @@ type ResponsesText struct {
|
|||||||
type ResponsesTool struct {
|
type ResponsesTool struct {
|
||||||
Type string `json:"type"` // "function"
|
Type string `json:"type"` // "function"
|
||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
Description string `json:"description,omitempty"`
|
Description *string `json:"description"` // nullable but required
|
||||||
Strict bool `json:"strict,omitempty"`
|
Strict *bool `json:"strict"` // nullable but required
|
||||||
Parameters map[string]any `json:"parameters,omitempty"`
|
Parameters map[string]any `json:"parameters"` // nullable but required
|
||||||
}
|
}
|
||||||
|
|
||||||
type ResponsesRequest struct {
|
type ResponsesRequest struct {
|
||||||
@@ -475,11 +476,16 @@ func convertTool(t ResponsesTool) (api.Tool, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var description string
|
||||||
|
if t.Description != nil {
|
||||||
|
description = *t.Description
|
||||||
|
}
|
||||||
|
|
||||||
return api.Tool{
|
return api.Tool{
|
||||||
Type: t.Type,
|
Type: t.Type,
|
||||||
Function: api.ToolFunction{
|
Function: api.ToolFunction{
|
||||||
Name: t.Name,
|
Name: t.Name,
|
||||||
Description: t.Description,
|
Description: description,
|
||||||
Parameters: params,
|
Parameters: params,
|
||||||
},
|
},
|
||||||
}, nil
|
}, nil
|
||||||
@@ -516,17 +522,60 @@ func convertInputMessage(m ResponsesInputMessage) (api.Message, error) {
|
|||||||
|
|
||||||
// Response types for the Responses API
|
// Response types for the Responses API
|
||||||
|
|
||||||
|
// ResponsesTextField represents the text output configuration in the response.
|
||||||
|
type ResponsesTextField struct {
|
||||||
|
Format ResponsesTextFormat `json:"format"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// ResponsesReasoningOutput represents reasoning configuration in the response.
|
||||||
|
type ResponsesReasoningOutput struct {
|
||||||
|
Effort *string `json:"effort,omitempty"`
|
||||||
|
Summary *string `json:"summary,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// ResponsesError represents an error in the response.
|
||||||
|
type ResponsesError struct {
|
||||||
|
Code string `json:"code"`
|
||||||
|
Message string `json:"message"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// ResponsesIncompleteDetails represents details about why a response was incomplete.
|
||||||
|
type ResponsesIncompleteDetails struct {
|
||||||
|
Reason string `json:"reason"`
|
||||||
|
}
|
||||||
|
|
||||||
type ResponsesResponse struct {
|
type ResponsesResponse struct {
|
||||||
ID string `json:"id"`
|
ID string `json:"id"`
|
||||||
Object string `json:"object"`
|
Object string `json:"object"`
|
||||||
CreatedAt int64 `json:"created_at"`
|
CreatedAt int64 `json:"created_at"`
|
||||||
|
CompletedAt *int64 `json:"completed_at"`
|
||||||
Status string `json:"status"`
|
Status string `json:"status"`
|
||||||
|
IncompleteDetails *ResponsesIncompleteDetails `json:"incomplete_details"`
|
||||||
Model string `json:"model"`
|
Model string `json:"model"`
|
||||||
|
PreviousResponseID *string `json:"previous_response_id"`
|
||||||
|
Instructions *string `json:"instructions"`
|
||||||
Output []ResponsesOutputItem `json:"output"`
|
Output []ResponsesOutputItem `json:"output"`
|
||||||
Usage *ResponsesUsage `json:"usage,omitempty"`
|
Error *ResponsesError `json:"error"`
|
||||||
// TODO(drifkin): add `temperature` and `top_p` to the response, but this
|
Tools []ResponsesTool `json:"tools"`
|
||||||
// requires additional plumbing to find the effective values since the
|
ToolChoice any `json:"tool_choice"`
|
||||||
// defaults can come from the model or the request
|
Truncation string `json:"truncation"`
|
||||||
|
ParallelToolCalls bool `json:"parallel_tool_calls"`
|
||||||
|
Text ResponsesTextField `json:"text"`
|
||||||
|
TopP float64 `json:"top_p"`
|
||||||
|
PresencePenalty float64 `json:"presence_penalty"`
|
||||||
|
FrequencyPenalty float64 `json:"frequency_penalty"`
|
||||||
|
TopLogprobs int `json:"top_logprobs"`
|
||||||
|
Temperature float64 `json:"temperature"`
|
||||||
|
Reasoning *ResponsesReasoningOutput `json:"reasoning"`
|
||||||
|
Usage *ResponsesUsage `json:"usage"`
|
||||||
|
MaxOutputTokens *int `json:"max_output_tokens"`
|
||||||
|
MaxToolCalls *int `json:"max_tool_calls"`
|
||||||
|
Store bool `json:"store"`
|
||||||
|
Background bool `json:"background"`
|
||||||
|
ServiceTier string `json:"service_tier"`
|
||||||
|
Metadata map[string]any `json:"metadata"`
|
||||||
|
SafetyIdentifier *string `json:"safety_identifier"`
|
||||||
|
PromptCacheKey *string `json:"prompt_cache_key"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type ResponsesOutputItem struct {
|
type ResponsesOutputItem struct {
|
||||||
@@ -552,16 +601,37 @@ type ResponsesReasoningSummary struct {
|
|||||||
type ResponsesOutputContent struct {
|
type ResponsesOutputContent struct {
|
||||||
Type string `json:"type"` // "output_text"
|
Type string `json:"type"` // "output_text"
|
||||||
Text string `json:"text"`
|
Text string `json:"text"`
|
||||||
|
Annotations []any `json:"annotations"`
|
||||||
|
Logprobs []any `json:"logprobs"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type ResponsesInputTokensDetails struct {
|
||||||
|
CachedTokens int `json:"cached_tokens"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type ResponsesOutputTokensDetails struct {
|
||||||
|
ReasoningTokens int `json:"reasoning_tokens"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type ResponsesUsage struct {
|
type ResponsesUsage struct {
|
||||||
InputTokens int `json:"input_tokens"`
|
InputTokens int `json:"input_tokens"`
|
||||||
OutputTokens int `json:"output_tokens"`
|
OutputTokens int `json:"output_tokens"`
|
||||||
TotalTokens int `json:"total_tokens"`
|
TotalTokens int `json:"total_tokens"`
|
||||||
|
InputTokensDetails ResponsesInputTokensDetails `json:"input_tokens_details"`
|
||||||
|
OutputTokensDetails ResponsesOutputTokensDetails `json:"output_tokens_details"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// ToResponse converts an api.ChatResponse to a Responses API response
|
// derefFloat64 returns the value of a float64 pointer, or a default if nil.
|
||||||
func ToResponse(model, responseID, itemID string, chatResponse api.ChatResponse) ResponsesResponse {
|
func derefFloat64(p *float64, def float64) float64 {
|
||||||
|
if p != nil {
|
||||||
|
return *p
|
||||||
|
}
|
||||||
|
return def
|
||||||
|
}
|
||||||
|
|
||||||
|
// ToResponse converts an api.ChatResponse to a Responses API response.
|
||||||
|
// The request is used to echo back request parameters in the response.
|
||||||
|
func ToResponse(model, responseID, itemID string, chatResponse api.ChatResponse, request ResponsesRequest) ResponsesResponse {
|
||||||
var output []ResponsesOutputItem
|
var output []ResponsesOutputItem
|
||||||
|
|
||||||
// Add reasoning item if thinking is present
|
// Add reasoning item if thinking is present
|
||||||
@@ -585,6 +655,7 @@ func ToResponse(model, responseID, itemID string, chatResponse api.ChatResponse)
|
|||||||
output = append(output, ResponsesOutputItem{
|
output = append(output, ResponsesOutputItem{
|
||||||
ID: fmt.Sprintf("fc_%s_%d", responseID, i),
|
ID: fmt.Sprintf("fc_%s_%d", responseID, i),
|
||||||
Type: "function_call",
|
Type: "function_call",
|
||||||
|
Status: "completed",
|
||||||
CallID: tc.ID,
|
CallID: tc.ID,
|
||||||
Name: tc.Function.Name,
|
Name: tc.Function.Name,
|
||||||
Arguments: tc.Function.Arguments,
|
Arguments: tc.Function.Arguments,
|
||||||
@@ -600,23 +671,88 @@ func ToResponse(model, responseID, itemID string, chatResponse api.ChatResponse)
|
|||||||
{
|
{
|
||||||
Type: "output_text",
|
Type: "output_text",
|
||||||
Text: chatResponse.Message.Content,
|
Text: chatResponse.Message.Content,
|
||||||
|
Annotations: []any{},
|
||||||
|
Logprobs: []any{},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var instructions *string
|
||||||
|
if request.Instructions != "" {
|
||||||
|
instructions = &request.Instructions
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build truncation with default
|
||||||
|
truncation := "disabled"
|
||||||
|
if request.Truncation != nil {
|
||||||
|
truncation = *request.Truncation
|
||||||
|
}
|
||||||
|
|
||||||
|
tools := request.Tools
|
||||||
|
if tools == nil {
|
||||||
|
tools = []ResponsesTool{}
|
||||||
|
}
|
||||||
|
|
||||||
|
text := ResponsesTextField{
|
||||||
|
Format: ResponsesTextFormat{Type: "text"},
|
||||||
|
}
|
||||||
|
if request.Text != nil && request.Text.Format != nil {
|
||||||
|
text.Format = *request.Text.Format
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build reasoning output from request
|
||||||
|
var reasoning *ResponsesReasoningOutput
|
||||||
|
if request.Reasoning.Effort != "" || request.Reasoning.Summary != "" {
|
||||||
|
reasoning = &ResponsesReasoningOutput{}
|
||||||
|
if request.Reasoning.Effort != "" {
|
||||||
|
reasoning.Effort = &request.Reasoning.Effort
|
||||||
|
}
|
||||||
|
if request.Reasoning.Summary != "" {
|
||||||
|
reasoning.Summary = &request.Reasoning.Summary
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return ResponsesResponse{
|
return ResponsesResponse{
|
||||||
ID: responseID,
|
ID: responseID,
|
||||||
Object: "response",
|
Object: "response",
|
||||||
CreatedAt: chatResponse.CreatedAt.Unix(),
|
CreatedAt: chatResponse.CreatedAt.Unix(),
|
||||||
|
CompletedAt: nil, // Set by middleware when writing final response
|
||||||
Status: "completed",
|
Status: "completed",
|
||||||
|
IncompleteDetails: nil, // Only populated if response incomplete
|
||||||
Model: model,
|
Model: model,
|
||||||
|
PreviousResponseID: nil, // Not supported
|
||||||
|
Instructions: instructions,
|
||||||
Output: output,
|
Output: output,
|
||||||
|
Error: nil, // Only populated on failure
|
||||||
|
Tools: tools,
|
||||||
|
ToolChoice: "auto", // Default value
|
||||||
|
Truncation: truncation,
|
||||||
|
ParallelToolCalls: true, // Default value
|
||||||
|
Text: text,
|
||||||
|
TopP: derefFloat64(request.TopP, 1.0),
|
||||||
|
PresencePenalty: 0, // Default value
|
||||||
|
FrequencyPenalty: 0, // Default value
|
||||||
|
TopLogprobs: 0, // Default value
|
||||||
|
Temperature: derefFloat64(request.Temperature, 1.0),
|
||||||
|
Reasoning: reasoning,
|
||||||
Usage: &ResponsesUsage{
|
Usage: &ResponsesUsage{
|
||||||
InputTokens: chatResponse.PromptEvalCount,
|
InputTokens: chatResponse.PromptEvalCount,
|
||||||
OutputTokens: chatResponse.EvalCount,
|
OutputTokens: chatResponse.EvalCount,
|
||||||
TotalTokens: chatResponse.PromptEvalCount + chatResponse.EvalCount,
|
TotalTokens: chatResponse.PromptEvalCount + chatResponse.EvalCount,
|
||||||
|
// TODO(drifkin): wire through the actual values
|
||||||
|
InputTokensDetails: ResponsesInputTokensDetails{CachedTokens: 0},
|
||||||
|
// TODO(drifkin): wire through the actual values
|
||||||
|
OutputTokensDetails: ResponsesOutputTokensDetails{ReasoningTokens: 0},
|
||||||
},
|
},
|
||||||
|
MaxOutputTokens: request.MaxOutputTokens,
|
||||||
|
MaxToolCalls: nil, // Not supported
|
||||||
|
Store: false, // We don't store responses
|
||||||
|
Background: request.Background,
|
||||||
|
ServiceTier: "default", // Default value
|
||||||
|
Metadata: map[string]any{},
|
||||||
|
SafetyIdentifier: nil, // Not supported
|
||||||
|
PromptCacheKey: nil, // Not supported
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -636,6 +772,7 @@ type ResponsesStreamConverter struct {
|
|||||||
responseID string
|
responseID string
|
||||||
itemID string
|
itemID string
|
||||||
model string
|
model string
|
||||||
|
request ResponsesRequest
|
||||||
|
|
||||||
// State tracking (mutated across Process calls)
|
// State tracking (mutated across Process calls)
|
||||||
firstWrite bool
|
firstWrite bool
|
||||||
@@ -668,11 +805,12 @@ func (c *ResponsesStreamConverter) newEvent(eventType string, data map[string]an
|
|||||||
}
|
}
|
||||||
|
|
||||||
// NewResponsesStreamConverter creates a new converter with the given configuration.
|
// NewResponsesStreamConverter creates a new converter with the given configuration.
|
||||||
func NewResponsesStreamConverter(responseID, itemID, model string) *ResponsesStreamConverter {
|
func NewResponsesStreamConverter(responseID, itemID, model string, request ResponsesRequest) *ResponsesStreamConverter {
|
||||||
return &ResponsesStreamConverter{
|
return &ResponsesStreamConverter{
|
||||||
responseID: responseID,
|
responseID: responseID,
|
||||||
itemID: itemID,
|
itemID: itemID,
|
||||||
model: model,
|
model: model,
|
||||||
|
request: request,
|
||||||
firstWrite: true,
|
firstWrite: true,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -717,25 +855,120 @@ func (c *ResponsesStreamConverter) Process(r api.ChatResponse) []ResponsesStream
|
|||||||
return events
|
return events
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *ResponsesStreamConverter) createResponseCreatedEvent() ResponsesStreamEvent {
|
// buildResponseObject creates a full response object with all required fields for streaming events.
|
||||||
return c.newEvent("response.created", map[string]any{
|
func (c *ResponsesStreamConverter) buildResponseObject(status string, output []any, usage map[string]any) map[string]any {
|
||||||
"response": map[string]any{
|
var instructions any = nil
|
||||||
|
if c.request.Instructions != "" {
|
||||||
|
instructions = c.request.Instructions
|
||||||
|
}
|
||||||
|
|
||||||
|
truncation := "disabled"
|
||||||
|
if c.request.Truncation != nil {
|
||||||
|
truncation = *c.request.Truncation
|
||||||
|
}
|
||||||
|
|
||||||
|
var tools []any
|
||||||
|
if c.request.Tools != nil {
|
||||||
|
for _, t := range c.request.Tools {
|
||||||
|
tools = append(tools, map[string]any{
|
||||||
|
"type": t.Type,
|
||||||
|
"name": t.Name,
|
||||||
|
"description": t.Description,
|
||||||
|
"strict": t.Strict,
|
||||||
|
"parameters": t.Parameters,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if tools == nil {
|
||||||
|
tools = []any{}
|
||||||
|
}
|
||||||
|
|
||||||
|
textFormat := map[string]any{"type": "text"}
|
||||||
|
if c.request.Text != nil && c.request.Text.Format != nil {
|
||||||
|
textFormat = map[string]any{
|
||||||
|
"type": c.request.Text.Format.Type,
|
||||||
|
}
|
||||||
|
if c.request.Text.Format.Name != "" {
|
||||||
|
textFormat["name"] = c.request.Text.Format.Name
|
||||||
|
}
|
||||||
|
if c.request.Text.Format.Schema != nil {
|
||||||
|
textFormat["schema"] = c.request.Text.Format.Schema
|
||||||
|
}
|
||||||
|
if c.request.Text.Format.Strict != nil {
|
||||||
|
textFormat["strict"] = *c.request.Text.Format.Strict
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var reasoning any = nil
|
||||||
|
if c.request.Reasoning.Effort != "" || c.request.Reasoning.Summary != "" {
|
||||||
|
r := map[string]any{}
|
||||||
|
if c.request.Reasoning.Effort != "" {
|
||||||
|
r["effort"] = c.request.Reasoning.Effort
|
||||||
|
} else {
|
||||||
|
r["effort"] = nil
|
||||||
|
}
|
||||||
|
if c.request.Reasoning.Summary != "" {
|
||||||
|
r["summary"] = c.request.Reasoning.Summary
|
||||||
|
} else {
|
||||||
|
r["summary"] = nil
|
||||||
|
}
|
||||||
|
reasoning = r
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build top_p and temperature with defaults
|
||||||
|
topP := 1.0
|
||||||
|
if c.request.TopP != nil {
|
||||||
|
topP = *c.request.TopP
|
||||||
|
}
|
||||||
|
temperature := 1.0
|
||||||
|
if c.request.Temperature != nil {
|
||||||
|
temperature = *c.request.Temperature
|
||||||
|
}
|
||||||
|
|
||||||
|
return map[string]any{
|
||||||
"id": c.responseID,
|
"id": c.responseID,
|
||||||
"object": "response",
|
"object": "response",
|
||||||
"status": "in_progress",
|
"created_at": time.Now().Unix(),
|
||||||
"output": []any{},
|
"completed_at": nil,
|
||||||
},
|
"status": status,
|
||||||
|
"incomplete_details": nil,
|
||||||
|
"model": c.model,
|
||||||
|
"previous_response_id": nil,
|
||||||
|
"instructions": instructions,
|
||||||
|
"output": output,
|
||||||
|
"error": nil,
|
||||||
|
"tools": tools,
|
||||||
|
"tool_choice": "auto",
|
||||||
|
"truncation": truncation,
|
||||||
|
"parallel_tool_calls": true,
|
||||||
|
"text": map[string]any{"format": textFormat},
|
||||||
|
"top_p": topP,
|
||||||
|
"presence_penalty": 0,
|
||||||
|
"frequency_penalty": 0,
|
||||||
|
"top_logprobs": 0,
|
||||||
|
"temperature": temperature,
|
||||||
|
"reasoning": reasoning,
|
||||||
|
"usage": usage,
|
||||||
|
"max_output_tokens": c.request.MaxOutputTokens,
|
||||||
|
"max_tool_calls": nil,
|
||||||
|
"store": false,
|
||||||
|
"background": c.request.Background,
|
||||||
|
"service_tier": "default",
|
||||||
|
"metadata": map[string]any{},
|
||||||
|
"safety_identifier": nil,
|
||||||
|
"prompt_cache_key": nil,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *ResponsesStreamConverter) createResponseCreatedEvent() ResponsesStreamEvent {
|
||||||
|
return c.newEvent("response.created", map[string]any{
|
||||||
|
"response": c.buildResponseObject("in_progress", []any{}, nil),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *ResponsesStreamConverter) createResponseInProgressEvent() ResponsesStreamEvent {
|
func (c *ResponsesStreamConverter) createResponseInProgressEvent() ResponsesStreamEvent {
|
||||||
return c.newEvent("response.in_progress", map[string]any{
|
return c.newEvent("response.in_progress", map[string]any{
|
||||||
"response": map[string]any{
|
"response": c.buildResponseObject("in_progress", []any{}, nil),
|
||||||
"id": c.responseID,
|
|
||||||
"object": "response",
|
|
||||||
"status": "in_progress",
|
|
||||||
"output": []any{},
|
|
||||||
},
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -764,6 +997,7 @@ func (c *ResponsesStreamConverter) processThinking(thinking string) []ResponsesS
|
|||||||
events = append(events, c.newEvent("response.reasoning_summary_text.delta", map[string]any{
|
events = append(events, c.newEvent("response.reasoning_summary_text.delta", map[string]any{
|
||||||
"item_id": c.reasoningItemID,
|
"item_id": c.reasoningItemID,
|
||||||
"output_index": c.outputIndex,
|
"output_index": c.outputIndex,
|
||||||
|
"summary_index": 0,
|
||||||
"delta": thinking,
|
"delta": thinking,
|
||||||
}))
|
}))
|
||||||
|
|
||||||
@@ -785,6 +1019,7 @@ func (c *ResponsesStreamConverter) finishReasoning() []ResponsesStreamEvent {
|
|||||||
c.newEvent("response.reasoning_summary_text.done", map[string]any{
|
c.newEvent("response.reasoning_summary_text.done", map[string]any{
|
||||||
"item_id": c.reasoningItemID,
|
"item_id": c.reasoningItemID,
|
||||||
"output_index": c.outputIndex,
|
"output_index": c.outputIndex,
|
||||||
|
"summary_index": 0,
|
||||||
"text": c.accumulatedThinking,
|
"text": c.accumulatedThinking,
|
||||||
}),
|
}),
|
||||||
c.newEvent("response.output_item.done", map[string]any{
|
c.newEvent("response.output_item.done", map[string]any{
|
||||||
@@ -900,6 +1135,8 @@ func (c *ResponsesStreamConverter) processTextContent(content string) []Response
|
|||||||
"part": map[string]any{
|
"part": map[string]any{
|
||||||
"type": "output_text",
|
"type": "output_text",
|
||||||
"text": "",
|
"text": "",
|
||||||
|
"annotations": []any{},
|
||||||
|
"logprobs": []any{},
|
||||||
},
|
},
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
@@ -913,6 +1150,7 @@ func (c *ResponsesStreamConverter) processTextContent(content string) []Response
|
|||||||
"output_index": c.outputIndex,
|
"output_index": c.outputIndex,
|
||||||
"content_index": 0,
|
"content_index": 0,
|
||||||
"delta": content,
|
"delta": content,
|
||||||
|
"logprobs": []any{},
|
||||||
}))
|
}))
|
||||||
|
|
||||||
return events
|
return events
|
||||||
@@ -946,6 +1184,8 @@ func (c *ResponsesStreamConverter) buildFinalOutput() []any {
|
|||||||
"content": []map[string]any{{
|
"content": []map[string]any{{
|
||||||
"type": "output_text",
|
"type": "output_text",
|
||||||
"text": c.accumulatedText,
|
"text": c.accumulatedText,
|
||||||
|
"annotations": []any{},
|
||||||
|
"logprobs": []any{},
|
||||||
}},
|
}},
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@@ -967,6 +1207,7 @@ func (c *ResponsesStreamConverter) processCompletion(r api.ChatResponse) []Respo
|
|||||||
"output_index": c.outputIndex,
|
"output_index": c.outputIndex,
|
||||||
"content_index": 0,
|
"content_index": 0,
|
||||||
"text": c.accumulatedText,
|
"text": c.accumulatedText,
|
||||||
|
"logprobs": []any{},
|
||||||
}))
|
}))
|
||||||
|
|
||||||
// response.content_part.done
|
// response.content_part.done
|
||||||
@@ -977,6 +1218,8 @@ func (c *ResponsesStreamConverter) processCompletion(r api.ChatResponse) []Respo
|
|||||||
"part": map[string]any{
|
"part": map[string]any{
|
||||||
"type": "output_text",
|
"type": "output_text",
|
||||||
"text": c.accumulatedText,
|
"text": c.accumulatedText,
|
||||||
|
"annotations": []any{},
|
||||||
|
"logprobs": []any{},
|
||||||
},
|
},
|
||||||
}))
|
}))
|
||||||
|
|
||||||
@@ -991,24 +1234,29 @@ func (c *ResponsesStreamConverter) processCompletion(r api.ChatResponse) []Respo
|
|||||||
"content": []map[string]any{{
|
"content": []map[string]any{{
|
||||||
"type": "output_text",
|
"type": "output_text",
|
||||||
"text": c.accumulatedText,
|
"text": c.accumulatedText,
|
||||||
|
"annotations": []any{},
|
||||||
|
"logprobs": []any{},
|
||||||
}},
|
}},
|
||||||
},
|
},
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
|
||||||
// response.completed
|
// response.completed
|
||||||
events = append(events, c.newEvent("response.completed", map[string]any{
|
usage := map[string]any{
|
||||||
"response": map[string]any{
|
|
||||||
"id": c.responseID,
|
|
||||||
"object": "response",
|
|
||||||
"status": "completed",
|
|
||||||
"output": c.buildFinalOutput(),
|
|
||||||
"usage": map[string]any{
|
|
||||||
"input_tokens": r.PromptEvalCount,
|
"input_tokens": r.PromptEvalCount,
|
||||||
"output_tokens": r.EvalCount,
|
"output_tokens": r.EvalCount,
|
||||||
"total_tokens": r.PromptEvalCount + r.EvalCount,
|
"total_tokens": r.PromptEvalCount + r.EvalCount,
|
||||||
|
"input_tokens_details": map[string]any{
|
||||||
|
"cached_tokens": 0,
|
||||||
},
|
},
|
||||||
|
"output_tokens_details": map[string]any{
|
||||||
|
"reasoning_tokens": 0,
|
||||||
},
|
},
|
||||||
|
}
|
||||||
|
response := c.buildResponseObject("completed", c.buildFinalOutput(), usage)
|
||||||
|
response["completed_at"] = time.Now().Unix()
|
||||||
|
events = append(events, c.newEvent("response.completed", map[string]any{
|
||||||
|
"response": response,
|
||||||
}))
|
}))
|
||||||
|
|
||||||
return events
|
return events
|
||||||
|
|||||||
@@ -850,7 +850,7 @@ func TestFromResponsesRequest_Images(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestResponsesStreamConverter_TextOnly(t *testing.T) {
|
func TestResponsesStreamConverter_TextOnly(t *testing.T) {
|
||||||
converter := NewResponsesStreamConverter("resp_123", "msg_456", "gpt-oss:20b")
|
converter := NewResponsesStreamConverter("resp_123", "msg_456", "gpt-oss:20b", ResponsesRequest{})
|
||||||
|
|
||||||
// First chunk with content
|
// First chunk with content
|
||||||
events := converter.Process(api.ChatResponse{
|
events := converter.Process(api.ChatResponse{
|
||||||
@@ -916,7 +916,7 @@ func TestResponsesStreamConverter_TextOnly(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestResponsesStreamConverter_ToolCalls(t *testing.T) {
|
func TestResponsesStreamConverter_ToolCalls(t *testing.T) {
|
||||||
converter := NewResponsesStreamConverter("resp_123", "msg_456", "gpt-oss:20b")
|
converter := NewResponsesStreamConverter("resp_123", "msg_456", "gpt-oss:20b", ResponsesRequest{})
|
||||||
|
|
||||||
events := converter.Process(api.ChatResponse{
|
events := converter.Process(api.ChatResponse{
|
||||||
Message: api.Message{
|
Message: api.Message{
|
||||||
@@ -952,7 +952,7 @@ func TestResponsesStreamConverter_ToolCalls(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestResponsesStreamConverter_Reasoning(t *testing.T) {
|
func TestResponsesStreamConverter_Reasoning(t *testing.T) {
|
||||||
converter := NewResponsesStreamConverter("resp_123", "msg_456", "gpt-oss:20b")
|
converter := NewResponsesStreamConverter("resp_123", "msg_456", "gpt-oss:20b", ResponsesRequest{})
|
||||||
|
|
||||||
// First chunk with thinking
|
// First chunk with thinking
|
||||||
events := converter.Process(api.ChatResponse{
|
events := converter.Process(api.ChatResponse{
|
||||||
@@ -1267,7 +1267,7 @@ func TestToResponse_WithReasoning(t *testing.T) {
|
|||||||
Content: "The answer is 42",
|
Content: "The answer is 42",
|
||||||
},
|
},
|
||||||
Done: true,
|
Done: true,
|
||||||
})
|
}, ResponsesRequest{})
|
||||||
|
|
||||||
// Should have 2 output items: reasoning + message
|
// Should have 2 output items: reasoning + message
|
||||||
if len(response.Output) != 2 {
|
if len(response.Output) != 2 {
|
||||||
@@ -1638,7 +1638,7 @@ func TestFromResponsesRequest_ShorthandFormats(t *testing.T) {
|
|||||||
|
|
||||||
func TestResponsesStreamConverter_OutputIncludesContent(t *testing.T) {
|
func TestResponsesStreamConverter_OutputIncludesContent(t *testing.T) {
|
||||||
// Verify that response.output_item.done includes content field for messages
|
// Verify that response.output_item.done includes content field for messages
|
||||||
converter := NewResponsesStreamConverter("resp_123", "msg_456", "gpt-oss:20b")
|
converter := NewResponsesStreamConverter("resp_123", "msg_456", "gpt-oss:20b", ResponsesRequest{})
|
||||||
|
|
||||||
// First chunk
|
// First chunk
|
||||||
converter.Process(api.ChatResponse{
|
converter.Process(api.ChatResponse{
|
||||||
@@ -1686,7 +1686,7 @@ func TestResponsesStreamConverter_OutputIncludesContent(t *testing.T) {
|
|||||||
|
|
||||||
func TestResponsesStreamConverter_ResponseCompletedIncludesOutput(t *testing.T) {
|
func TestResponsesStreamConverter_ResponseCompletedIncludesOutput(t *testing.T) {
|
||||||
// Verify that response.completed includes the output array
|
// Verify that response.completed includes the output array
|
||||||
converter := NewResponsesStreamConverter("resp_123", "msg_456", "gpt-oss:20b")
|
converter := NewResponsesStreamConverter("resp_123", "msg_456", "gpt-oss:20b", ResponsesRequest{})
|
||||||
|
|
||||||
// Process some content
|
// Process some content
|
||||||
converter.Process(api.ChatResponse{
|
converter.Process(api.ChatResponse{
|
||||||
@@ -1730,7 +1730,7 @@ func TestResponsesStreamConverter_ResponseCompletedIncludesOutput(t *testing.T)
|
|||||||
|
|
||||||
func TestResponsesStreamConverter_ResponseCreatedIncludesOutput(t *testing.T) {
|
func TestResponsesStreamConverter_ResponseCreatedIncludesOutput(t *testing.T) {
|
||||||
// Verify that response.created includes an empty output array
|
// Verify that response.created includes an empty output array
|
||||||
converter := NewResponsesStreamConverter("resp_123", "msg_456", "gpt-oss:20b")
|
converter := NewResponsesStreamConverter("resp_123", "msg_456", "gpt-oss:20b", ResponsesRequest{})
|
||||||
|
|
||||||
events := converter.Process(api.ChatResponse{
|
events := converter.Process(api.ChatResponse{
|
||||||
Message: api.Message{Content: "Hi"},
|
Message: api.Message{Content: "Hi"},
|
||||||
@@ -1757,7 +1757,7 @@ func TestResponsesStreamConverter_ResponseCreatedIncludesOutput(t *testing.T) {
|
|||||||
|
|
||||||
func TestResponsesStreamConverter_SequenceNumbers(t *testing.T) {
|
func TestResponsesStreamConverter_SequenceNumbers(t *testing.T) {
|
||||||
// Verify that events include incrementing sequence numbers
|
// Verify that events include incrementing sequence numbers
|
||||||
converter := NewResponsesStreamConverter("resp_123", "msg_456", "gpt-oss:20b")
|
converter := NewResponsesStreamConverter("resp_123", "msg_456", "gpt-oss:20b", ResponsesRequest{})
|
||||||
|
|
||||||
events := converter.Process(api.ChatResponse{
|
events := converter.Process(api.ChatResponse{
|
||||||
Message: api.Message{Content: "Hello"},
|
Message: api.Message{Content: "Hello"},
|
||||||
@@ -1791,7 +1791,7 @@ func TestResponsesStreamConverter_SequenceNumbers(t *testing.T) {
|
|||||||
|
|
||||||
func TestResponsesStreamConverter_FunctionCallStatus(t *testing.T) {
|
func TestResponsesStreamConverter_FunctionCallStatus(t *testing.T) {
|
||||||
// Verify that function call items include status field
|
// Verify that function call items include status field
|
||||||
converter := NewResponsesStreamConverter("resp_123", "msg_456", "gpt-oss:20b")
|
converter := NewResponsesStreamConverter("resp_123", "msg_456", "gpt-oss:20b", ResponsesRequest{})
|
||||||
|
|
||||||
events := converter.Process(api.ChatResponse{
|
events := converter.Process(api.ChatResponse{
|
||||||
Message: api.Message{
|
Message: api.Message{
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"os"
|
"os"
|
||||||
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Prompt struct {
|
type Prompt struct {
|
||||||
@@ -40,6 +41,7 @@ type Instance struct {
|
|||||||
Terminal *Terminal
|
Terminal *Terminal
|
||||||
History *History
|
History *History
|
||||||
Pasting bool
|
Pasting bool
|
||||||
|
pastedLines []string
|
||||||
}
|
}
|
||||||
|
|
||||||
func New(prompt Prompt) (*Instance, error) {
|
func New(prompt Prompt) (*Instance, error) {
|
||||||
@@ -174,6 +176,8 @@ func (i *Instance) Readline() (string, error) {
|
|||||||
case CharEsc:
|
case CharEsc:
|
||||||
esc = true
|
esc = true
|
||||||
case CharInterrupt:
|
case CharInterrupt:
|
||||||
|
i.pastedLines = nil
|
||||||
|
i.Prompt.UseAlt = false
|
||||||
return "", ErrInterrupt
|
return "", ErrInterrupt
|
||||||
case CharPrev:
|
case CharPrev:
|
||||||
i.historyPrev(buf, ¤tLineBuf)
|
i.historyPrev(buf, ¤tLineBuf)
|
||||||
@@ -188,7 +192,23 @@ func (i *Instance) Readline() (string, error) {
|
|||||||
case CharForward:
|
case CharForward:
|
||||||
buf.MoveRight()
|
buf.MoveRight()
|
||||||
case CharBackspace, CharCtrlH:
|
case CharBackspace, CharCtrlH:
|
||||||
|
if buf.IsEmpty() && len(i.pastedLines) > 0 {
|
||||||
|
lastIdx := len(i.pastedLines) - 1
|
||||||
|
prevLine := i.pastedLines[lastIdx]
|
||||||
|
i.pastedLines = i.pastedLines[:lastIdx]
|
||||||
|
fmt.Print(CursorBOL + ClearToEOL + CursorUp + CursorBOL + ClearToEOL)
|
||||||
|
if len(i.pastedLines) == 0 {
|
||||||
|
fmt.Print(i.Prompt.Prompt)
|
||||||
|
i.Prompt.UseAlt = false
|
||||||
|
} else {
|
||||||
|
fmt.Print(i.Prompt.AltPrompt)
|
||||||
|
}
|
||||||
|
for _, r := range prevLine {
|
||||||
|
buf.Add(r)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
buf.Remove()
|
buf.Remove()
|
||||||
|
}
|
||||||
case CharTab:
|
case CharTab:
|
||||||
// todo: convert back to real tabs
|
// todo: convert back to real tabs
|
||||||
for range 8 {
|
for range 8 {
|
||||||
@@ -211,13 +231,28 @@ func (i *Instance) Readline() (string, error) {
|
|||||||
case CharCtrlZ:
|
case CharCtrlZ:
|
||||||
fd := os.Stdin.Fd()
|
fd := os.Stdin.Fd()
|
||||||
return handleCharCtrlZ(fd, i.Terminal.termios)
|
return handleCharCtrlZ(fd, i.Terminal.termios)
|
||||||
case CharEnter, CharCtrlJ:
|
case CharCtrlJ:
|
||||||
|
i.pastedLines = append(i.pastedLines, buf.String())
|
||||||
|
buf.Buf.Clear()
|
||||||
|
buf.Pos = 0
|
||||||
|
buf.DisplayPos = 0
|
||||||
|
buf.LineHasSpace.Clear()
|
||||||
|
fmt.Println()
|
||||||
|
fmt.Print(i.Prompt.AltPrompt)
|
||||||
|
i.Prompt.UseAlt = true
|
||||||
|
continue
|
||||||
|
case CharEnter:
|
||||||
output := buf.String()
|
output := buf.String()
|
||||||
|
if len(i.pastedLines) > 0 {
|
||||||
|
output = strings.Join(i.pastedLines, "\n") + "\n" + output
|
||||||
|
i.pastedLines = nil
|
||||||
|
}
|
||||||
if output != "" {
|
if output != "" {
|
||||||
i.History.Add(output)
|
i.History.Add(output)
|
||||||
}
|
}
|
||||||
buf.MoveToEnd()
|
buf.MoveToEnd()
|
||||||
fmt.Println()
|
fmt.Println()
|
||||||
|
i.Prompt.UseAlt = false
|
||||||
|
|
||||||
return output, nil
|
return output, nil
|
||||||
default:
|
default:
|
||||||
|
|||||||
@@ -37,6 +37,7 @@ import (
|
|||||||
"github.com/ollama/ollama/model/input"
|
"github.com/ollama/ollama/model/input"
|
||||||
"github.com/ollama/ollama/runner/common"
|
"github.com/ollama/ollama/runner/common"
|
||||||
"github.com/ollama/ollama/sample"
|
"github.com/ollama/ollama/sample"
|
||||||
|
"github.com/ollama/ollama/tokenizers"
|
||||||
|
|
||||||
_ "github.com/ollama/ollama/model/models"
|
_ "github.com/ollama/ollama/model/models"
|
||||||
)
|
)
|
||||||
@@ -210,9 +211,9 @@ func (s *Server) NewSequence(prompt string, images []llm.ImageData, params NewSe
|
|||||||
}
|
}
|
||||||
|
|
||||||
// calculateLogprobs converts raw logits to log probabilities and finds top K tokens
|
// calculateLogprobs converts raw logits to log probabilities and finds top K tokens
|
||||||
func calculateLogprobs(logits []float32, selectedToken int32, topK int, textProcessor model.TextProcessor) []llm.Logprob {
|
func calculateLogprobs(logits []float32, selectedToken int32, topK int, tokenizer tokenizers.Tokenizer) []llm.Logprob {
|
||||||
decoder := func(tokenID int) string {
|
decoder := func(tokenID int) string {
|
||||||
text, _ := textProcessor.Decode([]int32{int32(tokenID)})
|
text, _ := tokenizer.Decode([]int32{int32(tokenID)})
|
||||||
return text
|
return text
|
||||||
}
|
}
|
||||||
return common.CalculateLogprobs(logits, int(selectedToken), topK, decoder)
|
return common.CalculateLogprobs(logits, int(selectedToken), topK, decoder)
|
||||||
@@ -242,7 +243,7 @@ func (s *Server) inputs(prompt string, images []llm.ImageData) ([]*input.Input,
|
|||||||
|
|
||||||
for i, part := range parts {
|
for i, part := range parts {
|
||||||
// text - tokenize
|
// text - tokenize
|
||||||
tokens, err := s.model.(model.TextProcessor).Encode(part, i == 0)
|
tokens, err := s.model.(tokenizers.Tokenizer).Encode(part, i == 0)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, nil, nil, err
|
return nil, nil, nil, err
|
||||||
}
|
}
|
||||||
@@ -766,7 +767,7 @@ func (s *Server) computeBatch(activeBatch batchState) {
|
|||||||
nextBatchTokens[i].Token = token
|
nextBatchTokens[i].Token = token
|
||||||
|
|
||||||
// if it's an end of sequence token, break
|
// if it's an end of sequence token, break
|
||||||
if s.model.(model.TextProcessor).Is(token, model.SpecialEOS) {
|
if s.model.(tokenizers.Tokenizer).Is(token, tokenizers.SpecialEOS) {
|
||||||
// TODO (jmorganca): we should send this back
|
// TODO (jmorganca): we should send this back
|
||||||
// as it's important for the /api/generate context
|
// as it's important for the /api/generate context
|
||||||
// seq.responses <- piece
|
// seq.responses <- piece
|
||||||
@@ -775,14 +776,14 @@ func (s *Server) computeBatch(activeBatch batchState) {
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
piece, err := s.model.(model.TextProcessor).Decode([]int32{token})
|
piece, err := s.model.(tokenizers.Tokenizer).Decode([]int32{token})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic("failed to decode token")
|
panic("failed to decode token")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Calculate logprobs if requested (after EOS check to avoid logprobs for EOS tokens)
|
// Calculate logprobs if requested (after EOS check to avoid logprobs for EOS tokens)
|
||||||
if seq.logprobs {
|
if seq.logprobs {
|
||||||
logprobs := calculateLogprobs(logits, token, seq.topLogprobs, s.model.(model.TextProcessor))
|
logprobs := calculateLogprobs(logits, token, seq.topLogprobs, s.model.(tokenizers.Tokenizer))
|
||||||
seq.pendingLogprobs = append(seq.pendingLogprobs, logprobs...)
|
seq.pendingLogprobs = append(seq.pendingLogprobs, logprobs...)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -873,7 +874,7 @@ func (s *Server) completion(w http.ResponseWriter, r *http.Request) {
|
|||||||
var grammar *sample.GrammarSampler
|
var grammar *sample.GrammarSampler
|
||||||
var err error
|
var err error
|
||||||
if req.Grammar != "" {
|
if req.Grammar != "" {
|
||||||
grammar, err = sample.NewGrammarSampler(s.model.(model.TextProcessor), req.Grammar)
|
grammar, err = sample.NewGrammarSampler(s.model.(tokenizers.Tokenizer), req.Grammar)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
http.Error(w, "failed to load model vocabulary required for format", http.StatusInternalServerError)
|
http.Error(w, "failed to load model vocabulary required for format", http.StatusInternalServerError)
|
||||||
return
|
return
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ import (
|
|||||||
"slices"
|
"slices"
|
||||||
|
|
||||||
"github.com/ollama/ollama/llama"
|
"github.com/ollama/ollama/llama"
|
||||||
"github.com/ollama/ollama/model"
|
"github.com/ollama/ollama/tokenizers"
|
||||||
)
|
)
|
||||||
|
|
||||||
// token represents information about a single token during sampling
|
// token represents information about a single token during sampling
|
||||||
@@ -168,15 +168,15 @@ type GrammarSampler struct {
|
|||||||
grammar *llama.Grammar
|
grammar *llama.Grammar
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewGrammarSampler(model model.TextProcessor, grammarStr string) (*GrammarSampler, error) {
|
func NewGrammarSampler(tokenizer tokenizers.Tokenizer, grammarStr string) (*GrammarSampler, error) {
|
||||||
vocabIds := make([]uint32, len(model.Vocabulary().Values))
|
vocabIds := make([]uint32, len(tokenizer.Vocabulary().Values))
|
||||||
pieces := make([]string, len(model.Vocabulary().Values))
|
pieces := make([]string, len(tokenizer.Vocabulary().Values))
|
||||||
for i := range model.Vocabulary().Values {
|
for i := range tokenizer.Vocabulary().Values {
|
||||||
pieces[i], _ = model.Decode([]int32{int32(i)})
|
pieces[i], _ = tokenizer.Decode([]int32{int32(i)})
|
||||||
vocabIds[i] = uint32(i)
|
vocabIds[i] = uint32(i)
|
||||||
}
|
}
|
||||||
|
|
||||||
grammar := llama.NewGrammar(grammarStr, vocabIds, pieces, model.Vocabulary().EOS)
|
grammar := llama.NewGrammar(grammarStr, vocabIds, pieces, tokenizer.Vocabulary().EOS)
|
||||||
if grammar == nil {
|
if grammar == nil {
|
||||||
return nil, errors.New("sample: failed to initialize grammar")
|
return nil, errors.New("sample: failed to initialize grammar")
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -60,7 +60,7 @@ _build_darwin() {
|
|||||||
cmake --install $BUILD_DIR --component MLX
|
cmake --install $BUILD_DIR --component MLX
|
||||||
# Override CGO flags to point to the amd64 build directory
|
# Override CGO flags to point to the amd64 build directory
|
||||||
MLX_CGO_CFLAGS="-O3 -I$(pwd)/$BUILD_DIR/_deps/mlx-c-src -mmacosx-version-min=14.0"
|
MLX_CGO_CFLAGS="-O3 -I$(pwd)/$BUILD_DIR/_deps/mlx-c-src -mmacosx-version-min=14.0"
|
||||||
MLX_CGO_LDFLAGS="-L$(pwd)/$BUILD_DIR/lib/ollama -lmlxc -lmlx -Wl,-rpath,@executable_path -lc++ -framework Accelerate -mmacosx-version-min=14.0"
|
MLX_CGO_LDFLAGS="-ldl -lc++ -framework Accelerate -mmacosx-version-min=14.0"
|
||||||
else
|
else
|
||||||
BUILD_DIR=build
|
BUILD_DIR=build
|
||||||
cmake --preset MLX \
|
cmake --preset MLX \
|
||||||
@@ -71,10 +71,12 @@ _build_darwin() {
|
|||||||
cmake --install $BUILD_DIR --component MLX
|
cmake --install $BUILD_DIR --component MLX
|
||||||
# Use default CGO flags from mlx.go for arm64
|
# Use default CGO flags from mlx.go for arm64
|
||||||
MLX_CGO_CFLAGS="-O3 -I$(pwd)/$BUILD_DIR/_deps/mlx-c-src -mmacosx-version-min=14.0"
|
MLX_CGO_CFLAGS="-O3 -I$(pwd)/$BUILD_DIR/_deps/mlx-c-src -mmacosx-version-min=14.0"
|
||||||
MLX_CGO_LDFLAGS="-L$(pwd)/$BUILD_DIR/lib/ollama -lmlxc -lmlx -Wl,-rpath,@executable_path -lc++ -framework Metal -framework Foundation -framework Accelerate -mmacosx-version-min=14.0"
|
MLX_CGO_LDFLAGS="-lc++ -framework Metal -framework Foundation -framework Accelerate -mmacosx-version-min=14.0"
|
||||||
fi
|
fi
|
||||||
GOOS=darwin GOARCH=$ARCH CGO_ENABLED=1 CGO_CFLAGS="$MLX_CGO_CFLAGS" CGO_LDFLAGS="$MLX_CGO_LDFLAGS" go build -tags mlx -o $INSTALL_PREFIX/ollama-mlx .
|
GOOS=darwin GOARCH=$ARCH CGO_ENABLED=1 CGO_CFLAGS="$MLX_CGO_CFLAGS" CGO_LDFLAGS="$MLX_CGO_LDFLAGS" go build -tags mlx -o $INSTALL_PREFIX .
|
||||||
GOOS=darwin GOARCH=$ARCH CGO_ENABLED=1 go build -o $INSTALL_PREFIX .
|
# Copy MLX libraries to same directory as executable for dlopen
|
||||||
|
cp $INSTALL_PREFIX/lib/ollama/libmlxc.dylib $INSTALL_PREFIX/
|
||||||
|
cp $INSTALL_PREFIX/lib/ollama/libmlx.dylib $INSTALL_PREFIX/
|
||||||
done
|
done
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -82,12 +84,10 @@ _sign_darwin() {
|
|||||||
status "Creating universal binary..."
|
status "Creating universal binary..."
|
||||||
mkdir -p dist/darwin
|
mkdir -p dist/darwin
|
||||||
lipo -create -output dist/darwin/ollama dist/darwin-*/ollama
|
lipo -create -output dist/darwin/ollama dist/darwin-*/ollama
|
||||||
lipo -create -output dist/darwin/ollama-mlx dist/darwin-*/ollama-mlx
|
|
||||||
chmod +x dist/darwin/ollama
|
chmod +x dist/darwin/ollama
|
||||||
chmod +x dist/darwin/ollama-mlx
|
|
||||||
|
|
||||||
if [ -n "$APPLE_IDENTITY" ]; then
|
if [ -n "$APPLE_IDENTITY" ]; then
|
||||||
for F in dist/darwin/ollama dist/darwin-*/lib/ollama/* dist/darwin/ollama-mlx; do
|
for F in dist/darwin/ollama dist/darwin-*/lib/ollama/*; do
|
||||||
codesign -f --timestamp -s "$APPLE_IDENTITY" --identifier ai.ollama.ollama --options=runtime $F
|
codesign -f --timestamp -s "$APPLE_IDENTITY" --identifier ai.ollama.ollama --options=runtime $F
|
||||||
done
|
done
|
||||||
|
|
||||||
@@ -154,7 +154,6 @@ _build_macapp() {
|
|||||||
mkdir -p dist/Ollama.app/Contents/Resources
|
mkdir -p dist/Ollama.app/Contents/Resources
|
||||||
if [ -d dist/darwin-amd64 ]; then
|
if [ -d dist/darwin-amd64 ]; then
|
||||||
lipo -create -output dist/Ollama.app/Contents/Resources/ollama dist/darwin-amd64/ollama dist/darwin-arm64/ollama
|
lipo -create -output dist/Ollama.app/Contents/Resources/ollama dist/darwin-amd64/ollama dist/darwin-arm64/ollama
|
||||||
lipo -create -output dist/Ollama.app/Contents/Resources/ollama-mlx dist/darwin-amd64/ollama-mlx dist/darwin-arm64/ollama-mlx
|
|
||||||
for F in dist/darwin-amd64/lib/ollama/*mlx*.dylib ; do
|
for F in dist/darwin-amd64/lib/ollama/*mlx*.dylib ; do
|
||||||
lipo -create -output dist/darwin/$(basename $F) $F dist/darwin-arm64/lib/ollama/$(basename $F)
|
lipo -create -output dist/darwin/$(basename $F) $F dist/darwin-arm64/lib/ollama/$(basename $F)
|
||||||
done
|
done
|
||||||
@@ -166,28 +165,27 @@ _build_macapp() {
|
|||||||
cp -a dist/darwin/ollama dist/Ollama.app/Contents/Resources/ollama
|
cp -a dist/darwin/ollama dist/Ollama.app/Contents/Resources/ollama
|
||||||
cp dist/darwin/*.so dist/darwin/*.dylib dist/Ollama.app/Contents/Resources/
|
cp dist/darwin/*.so dist/darwin/*.dylib dist/Ollama.app/Contents/Resources/
|
||||||
fi
|
fi
|
||||||
cp -a dist/darwin/ollama-mlx dist/Ollama.app/Contents/Resources/ollama-mlx
|
|
||||||
chmod a+x dist/Ollama.app/Contents/Resources/ollama
|
chmod a+x dist/Ollama.app/Contents/Resources/ollama
|
||||||
|
|
||||||
# Sign
|
# Sign
|
||||||
if [ -n "$APPLE_IDENTITY" ]; then
|
if [ -n "$APPLE_IDENTITY" ]; then
|
||||||
codesign -f --timestamp -s "$APPLE_IDENTITY" --identifier ai.ollama.ollama --options=runtime dist/Ollama.app/Contents/Resources/ollama
|
codesign -f --timestamp -s "$APPLE_IDENTITY" --identifier ai.ollama.ollama --options=runtime dist/Ollama.app/Contents/Resources/ollama
|
||||||
for lib in dist/Ollama.app/Contents/Resources/*.so dist/Ollama.app/Contents/Resources/*.dylib dist/Ollama.app/Contents/Resources/*.metallib dist/Ollama.app/Contents/Resources/ollama-mlx ; do
|
for lib in dist/Ollama.app/Contents/Resources/*.so dist/Ollama.app/Contents/Resources/*.dylib dist/Ollama.app/Contents/Resources/*.metallib ; do
|
||||||
codesign -f --timestamp -s "$APPLE_IDENTITY" --identifier ai.ollama.ollama --options=runtime ${lib}
|
codesign -f --timestamp -s "$APPLE_IDENTITY" --identifier ai.ollama.ollama --options=runtime ${lib}
|
||||||
done
|
done
|
||||||
codesign -f --timestamp -s "$APPLE_IDENTITY" --identifier com.electron.ollama --deep --options=runtime dist/Ollama.app
|
codesign -f --timestamp -s "$APPLE_IDENTITY" --identifier com.electron.ollama --deep --options=runtime dist/Ollama.app
|
||||||
fi
|
fi
|
||||||
|
|
||||||
rm -f dist/Ollama-darwin.zip
|
rm -f dist/Ollama-darwin.zip
|
||||||
ditto -c -k --keepParent dist/Ollama.app dist/Ollama-darwin.zip
|
ditto -c -k --norsrc --keepParent dist/Ollama.app dist/Ollama-darwin.zip
|
||||||
(cd dist/Ollama.app/Contents/Resources/; tar -cf - ollama ollama-mlx *.so *.dylib *.metallib 2>/dev/null) | gzip -9vc > dist/ollama-darwin.tgz
|
(cd dist/Ollama.app/Contents/Resources/; tar -cf - ollama *.so *.dylib *.metallib 2>/dev/null) | gzip -9vc > dist/ollama-darwin.tgz
|
||||||
|
|
||||||
# Notarize and Staple
|
# Notarize and Staple
|
||||||
if [ -n "$APPLE_IDENTITY" ]; then
|
if [ -n "$APPLE_IDENTITY" ]; then
|
||||||
$(xcrun -f notarytool) submit dist/Ollama-darwin.zip --wait --timeout 20m --apple-id "$APPLE_ID" --password "$APPLE_PASSWORD" --team-id "$APPLE_TEAM_ID"
|
$(xcrun -f notarytool) submit dist/Ollama-darwin.zip --wait --timeout 20m --apple-id "$APPLE_ID" --password "$APPLE_PASSWORD" --team-id "$APPLE_TEAM_ID"
|
||||||
rm -f dist/Ollama-darwin.zip
|
rm -f dist/Ollama-darwin.zip
|
||||||
$(xcrun -f stapler) staple dist/Ollama.app
|
$(xcrun -f stapler) staple dist/Ollama.app
|
||||||
ditto -c -k --keepParent dist/Ollama.app dist/Ollama-darwin.zip
|
ditto -c -k --norsrc --keepParent dist/Ollama.app dist/Ollama-darwin.zip
|
||||||
|
|
||||||
rm -f dist/Ollama.dmg
|
rm -f dist/Ollama.dmg
|
||||||
|
|
||||||
|
|||||||
@@ -50,12 +50,17 @@ func (r registryChallenge) URL() (*url.URL, error) {
|
|||||||
return redirectURL, nil
|
return redirectURL, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func getAuthorizationToken(ctx context.Context, challenge registryChallenge) (string, error) {
|
func getAuthorizationToken(ctx context.Context, challenge registryChallenge, originalHost string) (string, error) {
|
||||||
redirectURL, err := challenge.URL()
|
redirectURL, err := challenge.URL()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Validate that the realm host matches the original request host to prevent sending tokens cross-origin.
|
||||||
|
if redirectURL.Host != originalHost {
|
||||||
|
return "", fmt.Errorf("realm host %q does not match original host %q", redirectURL.Host, originalHost)
|
||||||
|
}
|
||||||
|
|
||||||
sha256sum := sha256.Sum256(nil)
|
sha256sum := sha256.Sum256(nil)
|
||||||
data := []byte(fmt.Sprintf("%s,%s,%s", http.MethodGet, redirectURL.String(), base64.StdEncoding.EncodeToString([]byte(hex.EncodeToString(sha256sum[:])))))
|
data := []byte(fmt.Sprintf("%s,%s,%s", http.MethodGet, redirectURL.String(), base64.StdEncoding.EncodeToString([]byte(hex.EncodeToString(sha256sum[:])))))
|
||||||
|
|
||||||
|
|||||||
113
server/auth_test.go
Normal file
@@ -0,0 +1,113 @@
|
|||||||
|
package server
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestGetAuthorizationTokenRejectsCrossDomain(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
realm string
|
||||||
|
originalHost string
|
||||||
|
wantMismatch bool
|
||||||
|
}{
|
||||||
|
{"https://example.com/token", "example.com", false},
|
||||||
|
{"https://example.com/token", "other.com", true},
|
||||||
|
{"https://example.com/token", "localhost:8000", true},
|
||||||
|
{"https://localhost:5000/token", "localhost:5000", false},
|
||||||
|
{"https://localhost:5000/token", "localhost:6000", true},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.originalHost, func(t *testing.T) {
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
challenge := registryChallenge{Realm: tt.realm, Service: "test", Scope: "repo:x:pull"}
|
||||||
|
_, err := getAuthorizationToken(ctx, challenge, tt.originalHost)
|
||||||
|
|
||||||
|
isMismatch := err != nil && strings.Contains(err.Error(), "does not match")
|
||||||
|
if tt.wantMismatch && !isMismatch {
|
||||||
|
t.Errorf("expected domain mismatch error, got: %v", err)
|
||||||
|
}
|
||||||
|
if !tt.wantMismatch && isMismatch {
|
||||||
|
t.Errorf("unexpected domain mismatch error: %v", err)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseRegistryChallenge(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
input string
|
||||||
|
wantRealm, wantService, wantScope string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
`Bearer realm="https://auth.example.com/token",service="registry",scope="repo:foo:pull"`,
|
||||||
|
"https://auth.example.com/token", "registry", "repo:foo:pull",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
`Bearer realm="https://r.ollama.ai/v2/token",service="ollama",scope="-"`,
|
||||||
|
"https://r.ollama.ai/v2/token", "ollama", "-",
|
||||||
|
},
|
||||||
|
{"", "", "", ""},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
result := parseRegistryChallenge(tt.input)
|
||||||
|
if result.Realm != tt.wantRealm || result.Service != tt.wantService || result.Scope != tt.wantScope {
|
||||||
|
t.Errorf("parseRegistryChallenge(%q) = {%q, %q, %q}, want {%q, %q, %q}",
|
||||||
|
tt.input, result.Realm, result.Service, result.Scope,
|
||||||
|
tt.wantRealm, tt.wantService, tt.wantScope)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRegistryChallengeURL(t *testing.T) {
|
||||||
|
challenge := registryChallenge{
|
||||||
|
Realm: "https://auth.example.com/token",
|
||||||
|
Service: "registry",
|
||||||
|
Scope: "repo:foo:pull repo:bar:push",
|
||||||
|
}
|
||||||
|
|
||||||
|
u, err := challenge.URL()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("URL() error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if u.Host != "auth.example.com" {
|
||||||
|
t.Errorf("host = %q, want %q", u.Host, "auth.example.com")
|
||||||
|
}
|
||||||
|
if u.Path != "/token" {
|
||||||
|
t.Errorf("path = %q, want %q", u.Path, "/token")
|
||||||
|
}
|
||||||
|
|
||||||
|
q := u.Query()
|
||||||
|
if q.Get("service") != "registry" {
|
||||||
|
t.Errorf("service = %q, want %q", q.Get("service"), "registry")
|
||||||
|
}
|
||||||
|
if scopes := q["scope"]; len(scopes) != 2 {
|
||||||
|
t.Errorf("scope count = %d, want 2", len(scopes))
|
||||||
|
}
|
||||||
|
if q.Get("ts") == "" {
|
||||||
|
t.Error("missing ts")
|
||||||
|
}
|
||||||
|
if q.Get("nonce") == "" {
|
||||||
|
t.Error("missing nonce")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Nonces should differ between calls
|
||||||
|
u2, _ := challenge.URL()
|
||||||
|
if q.Get("nonce") == u2.Query().Get("nonce") {
|
||||||
|
t.Error("nonce should be unique per call")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRegistryChallengeURLInvalid(t *testing.T) {
|
||||||
|
challenge := registryChallenge{Realm: "://invalid"}
|
||||||
|
if _, err := challenge.URL(); err == nil {
|
||||||
|
t.Error("expected error for invalid URL")
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -41,6 +41,7 @@ var (
|
|||||||
errCapabilityVision = errors.New("vision")
|
errCapabilityVision = errors.New("vision")
|
||||||
errCapabilityEmbedding = errors.New("embedding")
|
errCapabilityEmbedding = errors.New("embedding")
|
||||||
errCapabilityThinking = errors.New("thinking")
|
errCapabilityThinking = errors.New("thinking")
|
||||||
|
errCapabilityImage = errors.New("image generation")
|
||||||
errInsecureProtocol = errors.New("insecure protocol http")
|
errInsecureProtocol = errors.New("insecure protocol http")
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -76,7 +77,7 @@ func (m *Model) Capabilities() []model.Capability {
|
|||||||
|
|
||||||
// Check for image generation model via config capabilities
|
// Check for image generation model via config capabilities
|
||||||
if slices.Contains(m.Config.Capabilities, "image") {
|
if slices.Contains(m.Config.Capabilities, "image") {
|
||||||
return []model.Capability{model.CapabilityImageGeneration}
|
return []model.Capability{model.CapabilityImage}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check for completion capability
|
// Check for completion capability
|
||||||
@@ -159,6 +160,7 @@ func (m *Model) CheckCapabilities(want ...model.Capability) error {
|
|||||||
model.CapabilityVision: errCapabilityVision,
|
model.CapabilityVision: errCapabilityVision,
|
||||||
model.CapabilityEmbedding: errCapabilityEmbedding,
|
model.CapabilityEmbedding: errCapabilityEmbedding,
|
||||||
model.CapabilityThinking: errCapabilityThinking,
|
model.CapabilityThinking: errCapabilityThinking,
|
||||||
|
model.CapabilityImage: errCapabilityImage,
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, cap := range want {
|
for _, cap := range want {
|
||||||
@@ -775,7 +777,7 @@ func pullWithTransfer(ctx context.Context, mp ModelPath, layers []Layer, manifes
|
|||||||
Realm: challenge.Realm,
|
Realm: challenge.Realm,
|
||||||
Service: challenge.Service,
|
Service: challenge.Service,
|
||||||
Scope: challenge.Scope,
|
Scope: challenge.Scope,
|
||||||
})
|
}, base.Host)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := transfer.Download(ctx, transfer.DownloadOptions{
|
if err := transfer.Download(ctx, transfer.DownloadOptions{
|
||||||
@@ -850,7 +852,7 @@ func pushWithTransfer(ctx context.Context, mp ModelPath, layers []Layer, manifes
|
|||||||
Realm: challenge.Realm,
|
Realm: challenge.Realm,
|
||||||
Service: challenge.Service,
|
Service: challenge.Service,
|
||||||
Scope: challenge.Scope,
|
Scope: challenge.Scope,
|
||||||
})
|
}, base.Host)
|
||||||
}
|
}
|
||||||
|
|
||||||
return transfer.Upload(ctx, transfer.UploadOptions{
|
return transfer.Upload(ctx, transfer.UploadOptions{
|
||||||
@@ -916,7 +918,7 @@ func makeRequestWithRetry(ctx context.Context, method string, requestURL *url.UR
|
|||||||
|
|
||||||
// Handle authentication error with one retry
|
// Handle authentication error with one retry
|
||||||
challenge := parseRegistryChallenge(resp.Header.Get("www-authenticate"))
|
challenge := parseRegistryChallenge(resp.Header.Get("www-authenticate"))
|
||||||
token, err := getAuthorizationToken(ctx, challenge)
|
token, err := getAuthorizationToken(ctx, challenge, requestURL.Host)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -54,7 +54,7 @@ func TestModelCapabilities(t *testing.T) {
|
|||||||
Capabilities: []string{"image"},
|
Capabilities: []string{"image"},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
expectedCaps: []model.Capability{model.CapabilityImageGeneration},
|
expectedCaps: []model.Capability{model.CapabilityImage},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "model with completion capability",
|
name: "model with completion capability",
|
||||||
@@ -242,6 +242,24 @@ func TestModelCheckCapabilities(t *testing.T) {
|
|||||||
checkCaps: []model.Capability{"unknown"},
|
checkCaps: []model.Capability{"unknown"},
|
||||||
expectedErrMsg: "unknown capability",
|
expectedErrMsg: "unknown capability",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "model missing image generation capability",
|
||||||
|
model: Model{
|
||||||
|
ModelPath: completionModelPath,
|
||||||
|
Template: chatTemplate,
|
||||||
|
},
|
||||||
|
checkCaps: []model.Capability{model.CapabilityImage},
|
||||||
|
expectedErrMsg: "does not support image generation",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "model with image generation capability",
|
||||||
|
model: Model{
|
||||||
|
Config: model.ConfigV2{
|
||||||
|
Capabilities: []string{"image"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
checkCaps: []model.Capability{model.CapabilityImage},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, tt := range tests {
|
for _, tt := range tests {
|
||||||
|
|||||||
170
server/routes.go
@@ -51,7 +51,7 @@ import (
|
|||||||
"github.com/ollama/ollama/types/model"
|
"github.com/ollama/ollama/types/model"
|
||||||
"github.com/ollama/ollama/version"
|
"github.com/ollama/ollama/version"
|
||||||
"github.com/ollama/ollama/x/imagegen"
|
"github.com/ollama/ollama/x/imagegen"
|
||||||
imagegenapi "github.com/ollama/ollama/x/imagegen/api"
|
xserver "github.com/ollama/ollama/x/server"
|
||||||
)
|
)
|
||||||
|
|
||||||
const signinURLStr = "https://ollama.com/connect?name=%s&key=%s"
|
const signinURLStr = "https://ollama.com/connect?name=%s&key=%s"
|
||||||
@@ -164,29 +164,6 @@ func (s *Server) scheduleRunner(ctx context.Context, name string, caps []model.C
|
|||||||
return runner.llama, model, &opts, nil
|
return runner.llama, model, &opts, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// ScheduleImageGenRunner schedules an image generation model runner.
|
|
||||||
// This implements the imagegenapi.RunnerScheduler interface.
|
|
||||||
func (s *Server) ScheduleImageGenRunner(c *gin.Context, modelName string, opts api.Options, keepAlive *api.Duration) (llm.LlamaServer, error) {
|
|
||||||
m := &Model{
|
|
||||||
Name: modelName,
|
|
||||||
ShortName: modelName,
|
|
||||||
ModelPath: modelName, // For image gen, ModelPath is just the model name
|
|
||||||
Config: model.ConfigV2{
|
|
||||||
Capabilities: []string{"image"},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
runnerCh, errCh := s.sched.GetRunner(c.Request.Context(), m, opts, keepAlive)
|
|
||||||
var runner *runnerRef
|
|
||||||
select {
|
|
||||||
case runner = <-runnerCh:
|
|
||||||
case err := <-errCh:
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
return runner.llama, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func signinURL() (string, error) {
|
func signinURL() (string, error) {
|
||||||
pubKey, err := auth.GetPublicKey()
|
pubKey, err := auth.GetPublicKey()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -214,12 +191,6 @@ func (s *Server) GenerateHandler(c *gin.Context) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if this is a known image generation model
|
|
||||||
if imagegen.ResolveModelName(req.Model) != "" {
|
|
||||||
imagegenapi.HandleGenerateRequest(c, s, req.Model, req.Prompt, req.KeepAlive, streamResponse)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
name := model.ParseName(req.Model)
|
name := model.ParseName(req.Model)
|
||||||
if !name.IsValid() {
|
if !name.IsValid() {
|
||||||
// Ideally this is "invalid model name" but we're keeping with
|
// Ideally this is "invalid model name" but we're keeping with
|
||||||
@@ -249,6 +220,12 @@ func (s *Server) GenerateHandler(c *gin.Context) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Handle image generation models
|
||||||
|
if slices.Contains(m.Capabilities(), model.CapabilityImage) {
|
||||||
|
s.handleImageGenerate(c, req, name.String(), checkpointStart)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
if req.TopLogprobs < 0 || req.TopLogprobs > 20 {
|
if req.TopLogprobs < 0 || req.TopLogprobs > 20 {
|
||||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "top_logprobs must be between 0 and 20"})
|
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "top_logprobs must be between 0 and 20"})
|
||||||
return
|
return
|
||||||
@@ -1125,7 +1102,7 @@ func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// For image generation models, populate details from imagegen package
|
// For image generation models, populate details from imagegen package
|
||||||
if slices.Contains(m.Capabilities(), model.CapabilityImageGeneration) {
|
if slices.Contains(m.Capabilities(), model.CapabilityImage) {
|
||||||
if info, err := imagegen.GetModelInfo(name.String()); err == nil {
|
if info, err := imagegen.GetModelInfo(name.String()); err == nil {
|
||||||
modelDetails.Family = info.Architecture
|
modelDetails.Family = info.Architecture
|
||||||
modelDetails.ParameterSize = format.HumanNumber(uint64(info.ParameterCount))
|
modelDetails.ParameterSize = format.HumanNumber(uint64(info.ParameterCount))
|
||||||
@@ -1133,6 +1110,22 @@ func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// For safetensors LLM models (experimental), populate details from config.json
|
||||||
|
if m.Config.ModelFormat == "safetensors" && slices.Contains(m.Config.Capabilities, "completion") {
|
||||||
|
if info, err := xserver.GetSafetensorsLLMInfo(name.String()); err == nil {
|
||||||
|
if arch, ok := info["general.architecture"].(string); ok && arch != "" {
|
||||||
|
modelDetails.Family = arch
|
||||||
|
}
|
||||||
|
if paramCount, ok := info["general.parameter_count"].(int64); ok && paramCount > 0 {
|
||||||
|
modelDetails.ParameterSize = format.HumanNumber(uint64(paramCount))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Get torch_dtype directly from config.json for quantization level
|
||||||
|
if dtype, err := xserver.GetSafetensorsDtype(name.String()); err == nil && dtype != "" {
|
||||||
|
modelDetails.QuantizationLevel = dtype
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if req.System != "" {
|
if req.System != "" {
|
||||||
m.System = req.System
|
m.System = req.System
|
||||||
}
|
}
|
||||||
@@ -1215,7 +1208,27 @@ func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
|
|||||||
return resp, nil
|
return resp, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
if slices.Contains(m.Capabilities(), model.CapabilityImageGeneration) {
|
if slices.Contains(m.Capabilities(), model.CapabilityImage) {
|
||||||
|
// Populate tensor info if verbose
|
||||||
|
if req.Verbose {
|
||||||
|
if tensors, err := xserver.GetSafetensorsTensorInfo(name.String()); err == nil {
|
||||||
|
resp.Tensors = tensors
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return resp, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// For safetensors LLM models (experimental), populate ModelInfo from config.json
|
||||||
|
if m.Config.ModelFormat == "safetensors" && slices.Contains(m.Config.Capabilities, "completion") {
|
||||||
|
if info, err := xserver.GetSafetensorsLLMInfo(name.String()); err == nil {
|
||||||
|
resp.ModelInfo = info
|
||||||
|
}
|
||||||
|
// Populate tensor info if verbose
|
||||||
|
if req.Verbose {
|
||||||
|
if tensors, err := xserver.GetSafetensorsTensorInfo(name.String()); err == nil {
|
||||||
|
resp.Tensors = tensors
|
||||||
|
}
|
||||||
|
}
|
||||||
return resp, nil
|
return resp, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1587,13 +1600,12 @@ func (s *Server) GenerateRoutes(rc *ollama.Registry) (http.Handler, error) {
|
|||||||
r.GET("/v1/models", middleware.ListMiddleware(), s.ListHandler)
|
r.GET("/v1/models", middleware.ListMiddleware(), s.ListHandler)
|
||||||
r.GET("/v1/models/:model", middleware.RetrieveMiddleware(), s.ShowHandler)
|
r.GET("/v1/models/:model", middleware.RetrieveMiddleware(), s.ShowHandler)
|
||||||
r.POST("/v1/responses", middleware.ResponsesMiddleware(), s.ChatHandler)
|
r.POST("/v1/responses", middleware.ResponsesMiddleware(), s.ChatHandler)
|
||||||
|
// OpenAI-compatible image generation endpoint
|
||||||
|
r.POST("/v1/images/generations", middleware.ImageGenerationsMiddleware(), s.GenerateHandler)
|
||||||
|
|
||||||
// Inference (Anthropic compatibility)
|
// Inference (Anthropic compatibility)
|
||||||
r.POST("/v1/messages", middleware.AnthropicMessagesMiddleware(), s.ChatHandler)
|
r.POST("/v1/messages", middleware.AnthropicMessagesMiddleware(), s.ChatHandler)
|
||||||
|
|
||||||
// Experimental image generation support
|
|
||||||
imagegenapi.RegisterRoutes(r, s)
|
|
||||||
|
|
||||||
if rc != nil {
|
if rc != nil {
|
||||||
// wrap old with new
|
// wrap old with new
|
||||||
rs := ®istry.Local{
|
rs := ®istry.Local{
|
||||||
@@ -2460,3 +2472,91 @@ func filterThinkTags(msgs []api.Message, m *Model) []api.Message {
|
|||||||
}
|
}
|
||||||
return msgs
|
return msgs
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// handleImageGenerate handles image generation requests within GenerateHandler.
|
||||||
|
// This is called when the model has the Image capability.
|
||||||
|
func (s *Server) handleImageGenerate(c *gin.Context, req api.GenerateRequest, modelName string, checkpointStart time.Time) {
|
||||||
|
// Validate image dimensions
|
||||||
|
const maxDimension int32 = 4096
|
||||||
|
if req.Width > maxDimension || req.Height > maxDimension {
|
||||||
|
c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("width and height must be <= %d", maxDimension)})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Schedule the runner for image generation
|
||||||
|
runner, _, _, err := s.scheduleRunner(c.Request.Context(), modelName, []model.Capability{model.CapabilityImage}, nil, req.KeepAlive)
|
||||||
|
if err != nil {
|
||||||
|
handleScheduleError(c, req.Model, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
checkpointLoaded := time.Now()
|
||||||
|
|
||||||
|
// Handle load-only request (empty prompt)
|
||||||
|
if req.Prompt == "" {
|
||||||
|
c.JSON(http.StatusOK, api.GenerateResponse{
|
||||||
|
Model: req.Model,
|
||||||
|
CreatedAt: time.Now().UTC(),
|
||||||
|
Done: true,
|
||||||
|
DoneReason: "load",
|
||||||
|
})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set headers for streaming response
|
||||||
|
c.Header("Content-Type", "application/x-ndjson")
|
||||||
|
|
||||||
|
// Get seed from options if provided
|
||||||
|
var seed int64
|
||||||
|
if s, ok := req.Options["seed"]; ok {
|
||||||
|
switch v := s.(type) {
|
||||||
|
case int:
|
||||||
|
seed = int64(v)
|
||||||
|
case int64:
|
||||||
|
seed = v
|
||||||
|
case float64:
|
||||||
|
seed = int64(v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var streamStarted bool
|
||||||
|
if err := runner.Completion(c.Request.Context(), llm.CompletionRequest{
|
||||||
|
Prompt: req.Prompt,
|
||||||
|
Width: req.Width,
|
||||||
|
Height: req.Height,
|
||||||
|
Steps: req.Steps,
|
||||||
|
Seed: seed,
|
||||||
|
}, func(cr llm.CompletionResponse) {
|
||||||
|
streamStarted = true
|
||||||
|
res := api.GenerateResponse{
|
||||||
|
Model: req.Model,
|
||||||
|
CreatedAt: time.Now().UTC(),
|
||||||
|
Done: cr.Done,
|
||||||
|
}
|
||||||
|
|
||||||
|
if cr.TotalSteps > 0 {
|
||||||
|
res.Completed = int64(cr.Step)
|
||||||
|
res.Total = int64(cr.TotalSteps)
|
||||||
|
}
|
||||||
|
|
||||||
|
if cr.Image != "" {
|
||||||
|
res.Image = cr.Image
|
||||||
|
}
|
||||||
|
|
||||||
|
if cr.Done {
|
||||||
|
res.DoneReason = cr.DoneReason.String()
|
||||||
|
res.Metrics.TotalDuration = time.Since(checkpointStart)
|
||||||
|
res.Metrics.LoadDuration = checkpointLoaded.Sub(checkpointStart)
|
||||||
|
}
|
||||||
|
|
||||||
|
data, _ := json.Marshal(res)
|
||||||
|
c.Writer.Write(append(data, '\n'))
|
||||||
|
c.Writer.Flush()
|
||||||
|
}); err != nil {
|
||||||
|
// Only send JSON error if streaming hasn't started yet
|
||||||
|
// (once streaming starts, headers are committed and we can't change status code)
|
||||||
|
if !streamStarted {
|
||||||
|
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -574,7 +574,8 @@ func (s *Scheduler) loadImageGen(req *LlmRequest) bool {
|
|||||||
Options: &req.opts,
|
Options: &req.opts,
|
||||||
loading: false,
|
loading: false,
|
||||||
sessionDuration: sessionDuration,
|
sessionDuration: sessionDuration,
|
||||||
refCount: 1,
|
totalSize: server.TotalSize(),
|
||||||
|
vramSize: server.VRAMSize(),
|
||||||
}
|
}
|
||||||
|
|
||||||
s.loadedMu.Lock()
|
s.loadedMu.Lock()
|
||||||
|
|||||||
@@ -6,7 +6,6 @@ import (
|
|||||||
"errors"
|
"errors"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"os"
|
"os"
|
||||||
"slices"
|
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@@ -17,7 +16,6 @@ import (
|
|||||||
"github.com/ollama/ollama/fs/ggml"
|
"github.com/ollama/ollama/fs/ggml"
|
||||||
"github.com/ollama/ollama/llm"
|
"github.com/ollama/ollama/llm"
|
||||||
"github.com/ollama/ollama/ml"
|
"github.com/ollama/ollama/ml"
|
||||||
"github.com/ollama/ollama/types/model"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestMain(m *testing.M) {
|
func TestMain(m *testing.M) {
|
||||||
@@ -807,32 +805,8 @@ func (s *mockLlm) GetDeviceInfos(ctx context.Context) []ml.DeviceInfo { return n
|
|||||||
func (s *mockLlm) HasExited() bool { return false }
|
func (s *mockLlm) HasExited() bool { return false }
|
||||||
func (s *mockLlm) GetActiveDeviceIDs() []ml.DeviceID { return nil }
|
func (s *mockLlm) GetActiveDeviceIDs() []ml.DeviceID { return nil }
|
||||||
|
|
||||||
// TestImageGenCapabilityDetection verifies that models with "image" capability
|
|
||||||
// are correctly identified and routed differently from language models.
|
|
||||||
func TestImageGenCapabilityDetection(t *testing.T) {
|
|
||||||
// Model with image capability should be detected
|
|
||||||
imageModel := &Model{
|
|
||||||
Config: model.ConfigV2{
|
|
||||||
Capabilities: []string{"image"},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
require.True(t, slices.Contains(imageModel.Config.Capabilities, "image"))
|
|
||||||
|
|
||||||
// Model without image capability should not be detected
|
|
||||||
langModel := &Model{
|
|
||||||
Config: model.ConfigV2{
|
|
||||||
Capabilities: []string{"completion"},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
require.False(t, slices.Contains(langModel.Config.Capabilities, "image"))
|
|
||||||
|
|
||||||
// Empty capabilities should not match
|
|
||||||
emptyModel := &Model{}
|
|
||||||
require.False(t, slices.Contains(emptyModel.Config.Capabilities, "image"))
|
|
||||||
}
|
|
||||||
|
|
||||||
// TestImageGenRunnerCanBeEvicted verifies that an image generation model
|
// TestImageGenRunnerCanBeEvicted verifies that an image generation model
|
||||||
// loaded in the scheduler can be evicted by a language model request.
|
// loaded in the scheduler can be evicted when idle.
|
||||||
func TestImageGenRunnerCanBeEvicted(t *testing.T) {
|
func TestImageGenRunnerCanBeEvicted(t *testing.T) {
|
||||||
ctx, done := context.WithTimeout(t.Context(), 500*time.Millisecond)
|
ctx, done := context.WithTimeout(t.Context(), 500*time.Millisecond)
|
||||||
defer done()
|
defer done()
|
||||||
@@ -864,3 +838,59 @@ func TestImageGenRunnerCanBeEvicted(t *testing.T) {
|
|||||||
require.NotNil(t, runner)
|
require.NotNil(t, runner)
|
||||||
require.Equal(t, "/fake/image/model", runner.modelPath)
|
require.Equal(t, "/fake/image/model", runner.modelPath)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestImageGenSchedulerCoexistence verifies that image generation models
|
||||||
|
// can coexist with language models in the scheduler and VRAM is tracked correctly.
|
||||||
|
func TestImageGenSchedulerCoexistence(t *testing.T) {
|
||||||
|
ctx, done := context.WithTimeout(t.Context(), 500*time.Millisecond)
|
||||||
|
defer done()
|
||||||
|
|
||||||
|
s := InitScheduler(ctx)
|
||||||
|
s.getGpuFn = getGpuFn
|
||||||
|
s.getSystemInfoFn = getSystemInfoFn
|
||||||
|
|
||||||
|
// Load both an imagegen runner and a language model runner
|
||||||
|
imageGenRunner := &runnerRef{
|
||||||
|
model: &Model{Name: "flux", ModelPath: "/fake/flux/model"},
|
||||||
|
modelPath: "/fake/flux/model",
|
||||||
|
llama: &mockLlm{vramSize: 8 * format.GigaByte, vramByGPU: map[ml.DeviceID]uint64{{Library: "Metal"}: 8 * format.GigaByte}},
|
||||||
|
sessionDuration: 10 * time.Millisecond,
|
||||||
|
numParallel: 1,
|
||||||
|
refCount: 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
langModelRunner := &runnerRef{
|
||||||
|
model: &Model{Name: "llama3", ModelPath: "/fake/llama3/model"},
|
||||||
|
modelPath: "/fake/llama3/model",
|
||||||
|
llama: &mockLlm{vramSize: 4 * format.GigaByte, vramByGPU: map[ml.DeviceID]uint64{{Library: "Metal"}: 4 * format.GigaByte}},
|
||||||
|
sessionDuration: 10 * time.Millisecond,
|
||||||
|
numParallel: 1,
|
||||||
|
refCount: 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
s.loadedMu.Lock()
|
||||||
|
s.loaded["/fake/flux/model"] = imageGenRunner
|
||||||
|
s.loaded["/fake/llama3/model"] = langModelRunner
|
||||||
|
s.loadedMu.Unlock()
|
||||||
|
|
||||||
|
// Verify both are loaded
|
||||||
|
s.loadedMu.Lock()
|
||||||
|
require.Len(t, s.loaded, 2)
|
||||||
|
require.NotNil(t, s.loaded["/fake/flux/model"])
|
||||||
|
require.NotNil(t, s.loaded["/fake/llama3/model"])
|
||||||
|
s.loadedMu.Unlock()
|
||||||
|
|
||||||
|
// Verify updateFreeSpace accounts for both
|
||||||
|
gpus := []ml.DeviceInfo{
|
||||||
|
{
|
||||||
|
DeviceID: ml.DeviceID{Library: "Metal"},
|
||||||
|
TotalMemory: 24 * format.GigaByte,
|
||||||
|
FreeMemory: 24 * format.GigaByte,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
s.updateFreeSpace(gpus)
|
||||||
|
|
||||||
|
// Free memory should be reduced by both models
|
||||||
|
expectedFree := uint64(24*format.GigaByte) - uint64(8*format.GigaByte) - uint64(4*format.GigaByte)
|
||||||
|
require.Equal(t, expectedFree, gpus[0].FreeMemory)
|
||||||
|
}
|
||||||
|
|||||||
@@ -279,7 +279,7 @@ func (b *blobUpload) uploadPart(ctx context.Context, method string, requestURL *
|
|||||||
case resp.StatusCode == http.StatusUnauthorized:
|
case resp.StatusCode == http.StatusUnauthorized:
|
||||||
w.Rollback()
|
w.Rollback()
|
||||||
challenge := parseRegistryChallenge(resp.Header.Get("www-authenticate"))
|
challenge := parseRegistryChallenge(resp.Header.Get("www-authenticate"))
|
||||||
token, err := getAuthorizationToken(ctx, challenge)
|
token, err := getAuthorizationToken(ctx, challenge, requestURL.Host)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,8 +1,10 @@
|
|||||||
package model
|
package tokenizers
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"cmp"
|
"cmp"
|
||||||
|
"fmt"
|
||||||
"iter"
|
"iter"
|
||||||
|
"log/slog"
|
||||||
"slices"
|
"slices"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
@@ -16,7 +18,7 @@ type BytePairEncoding struct {
|
|||||||
regexps []*regexp2.Regexp
|
regexps []*regexp2.Regexp
|
||||||
}
|
}
|
||||||
|
|
||||||
var _ TextProcessor = (*BytePairEncoding)(nil)
|
var _ Tokenizer = (*BytePairEncoding)(nil)
|
||||||
|
|
||||||
func NewBytePairEncoding(vocab *Vocabulary, pretokenizers ...string) BytePairEncoding {
|
func NewBytePairEncoding(vocab *Vocabulary, pretokenizers ...string) BytePairEncoding {
|
||||||
if len(pretokenizers) == 0 {
|
if len(pretokenizers) == 0 {
|
||||||
@@ -243,6 +245,14 @@ func (bpe BytePairEncoding) Encode(s string, addSpecial bool) ([]int32, error) {
|
|||||||
return ids, nil
|
return ids, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type lazyIdsString struct {
|
||||||
|
ids []int32
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l lazyIdsString) LogValue() slog.Value {
|
||||||
|
return slog.AnyValue(fmt.Sprint(l.ids))
|
||||||
|
}
|
||||||
|
|
||||||
func (bpe BytePairEncoding) Decode(ids []int32) (string, error) {
|
func (bpe BytePairEncoding) Decode(ids []int32) (string, error) {
|
||||||
var sb strings.Builder
|
var sb strings.Builder
|
||||||
for _, id := range ids {
|
for _, id := range ids {
|
||||||
@@ -267,6 +277,6 @@ func (bpe BytePairEncoding) Decode(ids []int32) (string, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
logutil.Trace("decoded", "string", sb.String(), "from", ids)
|
logutil.Trace("decoded", "string", sb.String(), "from", lazyIdsString{ids: ids})
|
||||||
return sb.String(), nil
|
return sb.String(), nil
|
||||||
}
|
}
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
package model
|
package tokenizers
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
@@ -17,7 +17,7 @@ import (
|
|||||||
func llama(t testing.TB) BytePairEncoding {
|
func llama(t testing.TB) BytePairEncoding {
|
||||||
t.Helper()
|
t.Helper()
|
||||||
|
|
||||||
f, err := os.Open(filepath.Join("testdata", "llama3.2", "encoder.json"))
|
f, err := os.Open(filepath.Join("..", "..", "model", "testdata", "llama3.2", "encoder.json"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
@@ -43,7 +43,7 @@ func llama(t testing.TB) BytePairEncoding {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
f, err = os.Open(filepath.Join("testdata", "llama3.2", "vocab.bpe"))
|
f, err = os.Open(filepath.Join("..", "..", "model", "testdata", "llama3.2", "vocab.bpe"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
package model
|
package tokenizers
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"container/heap"
|
"container/heap"
|
||||||
@@ -17,7 +17,7 @@ type SentencePiece struct {
|
|||||||
vocab *Vocabulary
|
vocab *Vocabulary
|
||||||
}
|
}
|
||||||
|
|
||||||
var _ TextProcessor = (*SentencePiece)(nil)
|
var _ Tokenizer = (*SentencePiece)(nil)
|
||||||
|
|
||||||
func (spm SentencePiece) Vocabulary() *Vocabulary {
|
func (spm SentencePiece) Vocabulary() *Vocabulary {
|
||||||
return spm.vocab
|
return spm.vocab
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
package model
|
package tokenizers
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"log/slog"
|
"log/slog"
|
||||||
@@ -15,7 +15,7 @@ import (
|
|||||||
func loadSentencePieceVocab(t *testing.T) SentencePiece {
|
func loadSentencePieceVocab(t *testing.T) SentencePiece {
|
||||||
t.Helper()
|
t.Helper()
|
||||||
|
|
||||||
bts, err := os.ReadFile(filepath.Join("testdata", "gemma2", "tokenizer.model"))
|
bts, err := os.ReadFile(filepath.Join("..", "..", "model", "testdata", "gemma2", "tokenizer.model"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
package model
|
package tokenizers
|
||||||
|
|
||||||
const (
|
const (
|
||||||
TOKEN_TYPE_NORMAL = iota + 1
|
TOKEN_TYPE_NORMAL = iota + 1
|
||||||
@@ -9,7 +9,7 @@ const (
|
|||||||
TOKEN_TYPE_BYTE
|
TOKEN_TYPE_BYTE
|
||||||
)
|
)
|
||||||
|
|
||||||
type TextProcessor interface {
|
type Tokenizer interface {
|
||||||
Encode(s string, addSpecial bool) ([]int32, error)
|
Encode(s string, addSpecial bool) ([]int32, error)
|
||||||
Decode([]int32) (string, error)
|
Decode([]int32) (string, error)
|
||||||
Is(int32, Special) bool
|
Is(int32, Special) bool
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
package model
|
package tokenizers
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"log/slog"
|
"log/slog"
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
package model
|
package tokenizers
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"testing"
|
"testing"
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
package model
|
package tokenizers
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
@@ -32,7 +32,7 @@ var wordPieceReplacer = strings.NewReplacer(
|
|||||||
" 're", "'re",
|
" 're", "'re",
|
||||||
)
|
)
|
||||||
|
|
||||||
// Decode implements TextProcessor.
|
// Decode implements Tokenizer.
|
||||||
func (wpm WordPiece) Decode(ids []int32) (string, error) {
|
func (wpm WordPiece) Decode(ids []int32) (string, error) {
|
||||||
var sb strings.Builder
|
var sb strings.Builder
|
||||||
for i, id := range ids {
|
for i, id := range ids {
|
||||||
@@ -96,7 +96,7 @@ func (wpm WordPiece) words(s string) iter.Seq[string] {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Encode implements TextProcessor.
|
// Encode implements Tokenizer.
|
||||||
func (wpm WordPiece) Encode(s string, addSpecial bool) ([]int32, error) {
|
func (wpm WordPiece) Encode(s string, addSpecial bool) ([]int32, error) {
|
||||||
var ids []int32
|
var ids []int32
|
||||||
|
|
||||||
@@ -151,17 +151,17 @@ func (wpm WordPiece) Encode(s string, addSpecial bool) ([]int32, error) {
|
|||||||
return ids, nil
|
return ids, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Is implements TextProcessor.
|
// Is implements Tokenizer.
|
||||||
func (wpm WordPiece) Is(id int32, special Special) bool {
|
func (wpm WordPiece) Is(id int32, special Special) bool {
|
||||||
return wpm.vocab.Is(id, special)
|
return wpm.vocab.Is(id, special)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Vocabulary implements TextProcessor.
|
// Vocabulary implements Tokenizer.
|
||||||
func (wpm WordPiece) Vocabulary() *Vocabulary {
|
func (wpm WordPiece) Vocabulary() *Vocabulary {
|
||||||
return wpm.vocab
|
return wpm.vocab
|
||||||
}
|
}
|
||||||
|
|
||||||
var _ TextProcessor = (*WordPiece)(nil)
|
var _ Tokenizer = (*WordPiece)(nil)
|
||||||
|
|
||||||
func NewWordPiece(vocab *Vocabulary, lowercase bool) WordPiece {
|
func NewWordPiece(vocab *Vocabulary, lowercase bool) WordPiece {
|
||||||
return WordPiece{
|
return WordPiece{
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
package model
|
package tokenizers
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"slices"
|
"slices"
|
||||||
@@ -9,7 +9,7 @@ const (
|
|||||||
CapabilityVision = Capability("vision")
|
CapabilityVision = Capability("vision")
|
||||||
CapabilityEmbedding = Capability("embedding")
|
CapabilityEmbedding = Capability("embedding")
|
||||||
CapabilityThinking = Capability("thinking")
|
CapabilityThinking = Capability("thinking")
|
||||||
CapabilityImageGeneration = Capability("image")
|
CapabilityImage = Capability("image")
|
||||||
)
|
)
|
||||||
|
|
||||||
func (c Capability) String() string {
|
func (c Capability) String() string {
|
||||||
|
|||||||
50
x/README.md
@@ -1,50 +0,0 @@
|
|||||||
# Experimental Features
|
|
||||||
|
|
||||||
## MLX Backend
|
|
||||||
|
|
||||||
We're working on a new experimental backend based on the [MLX project](https://github.com/ml-explore/mlx)
|
|
||||||
|
|
||||||
Support is currently limited to MacOS and Linux with CUDA GPUs. We're looking to add support for Windows CUDA soon, and other GPU vendors.
|
|
||||||
|
|
||||||
### Building ollama-mlx
|
|
||||||
|
|
||||||
The `ollama-mlx` binary is a separate build of Ollama with MLX support enabled. This enables experimental features like image generation.
|
|
||||||
|
|
||||||
#### macOS (Apple Silicon and Intel)
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Build MLX backend libraries
|
|
||||||
cmake --preset MLX
|
|
||||||
cmake --build --preset MLX --parallel
|
|
||||||
cmake --install build --component MLX
|
|
||||||
|
|
||||||
# Build ollama-mlx binary
|
|
||||||
go build -tags mlx -o ollama-mlx .
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Linux (CUDA)
|
|
||||||
|
|
||||||
On Linux, use the preset "MLX CUDA 13" or "MLX CUDA 12" to enable CUDA with the default Ollama NVIDIA GPU architectures enabled:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Build MLX backend libraries with CUDA support
|
|
||||||
cmake --preset 'MLX CUDA 13'
|
|
||||||
cmake --build --preset 'MLX CUDA 13' --parallel
|
|
||||||
cmake --install build --component MLX
|
|
||||||
|
|
||||||
# Build ollama-mlx binary
|
|
||||||
CGO_CFLAGS="-O3 -I$(pwd)/build/_deps/mlx-c-src" \
|
|
||||||
CGO_LDFLAGS="-L$(pwd)/build/lib/ollama -lmlxc -lmlx" \
|
|
||||||
go build -tags mlx -o ollama-mlx .
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Using build scripts
|
|
||||||
|
|
||||||
The build scripts automatically create the `ollama-mlx` binary:
|
|
||||||
|
|
||||||
- **macOS**: `./scripts/build_darwin.sh` produces `dist/darwin/ollama-mlx`
|
|
||||||
- **Linux**: `./scripts/build_linux.sh` produces `ollama-mlx` in the output archives
|
|
||||||
|
|
||||||
## Image Generation
|
|
||||||
|
|
||||||
Image generation is built into the `ollama-mlx` binary. Run `ollama-mlx serve` to start the server with image generation support enabled.
|
|
||||||
67
x/cmd/run.go
@@ -25,14 +25,6 @@ import (
|
|||||||
"github.com/ollama/ollama/x/tools"
|
"github.com/ollama/ollama/x/tools"
|
||||||
)
|
)
|
||||||
|
|
||||||
// MultilineState tracks the state of multiline input
|
|
||||||
type MultilineState int
|
|
||||||
|
|
||||||
const (
|
|
||||||
MultilineNone MultilineState = iota
|
|
||||||
MultilineSystem
|
|
||||||
)
|
|
||||||
|
|
||||||
// Tool output capping constants
|
// Tool output capping constants
|
||||||
const (
|
const (
|
||||||
// localModelTokenLimit is the token limit for local models (smaller context).
|
// localModelTokenLimit is the token limit for local models (smaller context).
|
||||||
@@ -656,7 +648,7 @@ func GenerateInteractive(cmd *cobra.Command, modelName string, wordWrap bool, op
|
|||||||
Prompt: ">>> ",
|
Prompt: ">>> ",
|
||||||
AltPrompt: "... ",
|
AltPrompt: "... ",
|
||||||
Placeholder: "Send a message (/? for help)",
|
Placeholder: "Send a message (/? for help)",
|
||||||
AltPlaceholder: `Use """ to end multi-line input`,
|
AltPlaceholder: "Press Enter to send",
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
@@ -707,7 +699,6 @@ func GenerateInteractive(cmd *cobra.Command, modelName string, wordWrap bool, op
|
|||||||
var sb strings.Builder
|
var sb strings.Builder
|
||||||
var format string
|
var format string
|
||||||
var system string
|
var system string
|
||||||
var multiline MultilineState = MultilineNone
|
|
||||||
|
|
||||||
for {
|
for {
|
||||||
line, err := scanner.Readline()
|
line, err := scanner.Readline()
|
||||||
@@ -721,37 +712,12 @@ func GenerateInteractive(cmd *cobra.Command, modelName string, wordWrap bool, op
|
|||||||
}
|
}
|
||||||
scanner.Prompt.UseAlt = false
|
scanner.Prompt.UseAlt = false
|
||||||
sb.Reset()
|
sb.Reset()
|
||||||
multiline = MultilineNone
|
|
||||||
continue
|
continue
|
||||||
case err != nil:
|
case err != nil:
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
switch {
|
switch {
|
||||||
case multiline != MultilineNone:
|
|
||||||
// check if there's a multiline terminating string
|
|
||||||
before, ok := strings.CutSuffix(line, `"""`)
|
|
||||||
sb.WriteString(before)
|
|
||||||
if !ok {
|
|
||||||
fmt.Fprintln(&sb)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
switch multiline {
|
|
||||||
case MultilineSystem:
|
|
||||||
system = sb.String()
|
|
||||||
newMessage := api.Message{Role: "system", Content: system}
|
|
||||||
if len(messages) > 0 && messages[len(messages)-1].Role == "system" {
|
|
||||||
messages[len(messages)-1] = newMessage
|
|
||||||
} else {
|
|
||||||
messages = append(messages, newMessage)
|
|
||||||
}
|
|
||||||
fmt.Println("Set system message.")
|
|
||||||
sb.Reset()
|
|
||||||
}
|
|
||||||
|
|
||||||
multiline = MultilineNone
|
|
||||||
scanner.Prompt.UseAlt = false
|
|
||||||
case strings.HasPrefix(line, "/exit"), strings.HasPrefix(line, "/bye"):
|
case strings.HasPrefix(line, "/exit"), strings.HasPrefix(line, "/bye"):
|
||||||
return nil
|
return nil
|
||||||
case strings.HasPrefix(line, "/clear"):
|
case strings.HasPrefix(line, "/clear"):
|
||||||
@@ -860,41 +826,18 @@ func GenerateInteractive(cmd *cobra.Command, modelName string, wordWrap bool, op
|
|||||||
options[args[2]] = fp[args[2]]
|
options[args[2]] = fp[args[2]]
|
||||||
case "system":
|
case "system":
|
||||||
if len(args) < 3 {
|
if len(args) < 3 {
|
||||||
fmt.Println("Usage: /set system <message> or /set system \"\"\"<multi-line message>\"\"\"")
|
fmt.Println("Usage: /set system <message>")
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
multiline = MultilineSystem
|
system = strings.Join(args[2:], " ")
|
||||||
|
newMessage := api.Message{Role: "system", Content: system}
|
||||||
line := strings.Join(args[2:], " ")
|
|
||||||
line, ok := strings.CutPrefix(line, `"""`)
|
|
||||||
if !ok {
|
|
||||||
multiline = MultilineNone
|
|
||||||
} else {
|
|
||||||
// only cut suffix if the line is multiline
|
|
||||||
line, ok = strings.CutSuffix(line, `"""`)
|
|
||||||
if ok {
|
|
||||||
multiline = MultilineNone
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
sb.WriteString(line)
|
|
||||||
if multiline != MultilineNone {
|
|
||||||
scanner.Prompt.UseAlt = true
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
system = sb.String()
|
|
||||||
newMessage := api.Message{Role: "system", Content: sb.String()}
|
|
||||||
// Check if the slice is not empty and the last message is from 'system'
|
|
||||||
if len(messages) > 0 && messages[len(messages)-1].Role == "system" {
|
if len(messages) > 0 && messages[len(messages)-1].Role == "system" {
|
||||||
// Replace the last message
|
|
||||||
messages[len(messages)-1] = newMessage
|
messages[len(messages)-1] = newMessage
|
||||||
} else {
|
} else {
|
||||||
messages = append(messages, newMessage)
|
messages = append(messages, newMessage)
|
||||||
}
|
}
|
||||||
fmt.Println("Set system message.")
|
fmt.Println("Set system message.")
|
||||||
sb.Reset()
|
|
||||||
continue
|
continue
|
||||||
default:
|
default:
|
||||||
fmt.Printf("Unknown command '/set %s'. Type /? for help\n", args[1])
|
fmt.Printf("Unknown command '/set %s'. Type /? for help\n", args[1])
|
||||||
@@ -1081,7 +1024,7 @@ func GenerateInteractive(cmd *cobra.Command, modelName string, wordWrap bool, op
|
|||||||
sb.WriteString(line)
|
sb.WriteString(line)
|
||||||
}
|
}
|
||||||
|
|
||||||
if sb.Len() > 0 && multiline == MultilineNone {
|
if sb.Len() > 0 {
|
||||||
newMessage := api.Message{Role: "user", Content: sb.String()}
|
newMessage := api.Message{Role: "user", Content: sb.String()}
|
||||||
messages = append(messages, newMessage)
|
messages = append(messages, newMessage)
|
||||||
|
|
||||||
|
|||||||
282
x/create/client/create.go
Normal file
@@ -0,0 +1,282 @@
|
|||||||
|
// Package client provides client-side model creation for safetensors-based models.
|
||||||
|
//
|
||||||
|
// This package is in x/ because the safetensors model storage format is under development.
|
||||||
|
// It also exists to break an import cycle: server imports x/create, so x/create
|
||||||
|
// cannot import server. This sub-package can import server because server doesn't
|
||||||
|
// import it.
|
||||||
|
package client
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
|
||||||
|
"github.com/ollama/ollama/progress"
|
||||||
|
"github.com/ollama/ollama/server"
|
||||||
|
"github.com/ollama/ollama/types/model"
|
||||||
|
"github.com/ollama/ollama/x/create"
|
||||||
|
)
|
||||||
|
|
||||||
|
// MinOllamaVersion is the minimum Ollama version required for safetensors models.
|
||||||
|
const MinOllamaVersion = "0.14.0"
|
||||||
|
|
||||||
|
// ModelfileConfig holds configuration extracted from a Modelfile.
|
||||||
|
type ModelfileConfig struct {
|
||||||
|
Template string
|
||||||
|
System string
|
||||||
|
License string
|
||||||
|
}
|
||||||
|
|
||||||
|
// CreateOptions holds all options for model creation.
|
||||||
|
type CreateOptions struct {
|
||||||
|
ModelName string
|
||||||
|
ModelDir string
|
||||||
|
Quantize string // "fp8" for quantization
|
||||||
|
Modelfile *ModelfileConfig // template/system/license from Modelfile
|
||||||
|
}
|
||||||
|
|
||||||
|
// CreateModel imports a model from a local directory.
|
||||||
|
// This creates blobs and manifest directly on disk, bypassing the HTTP API.
|
||||||
|
// Automatically detects model type (safetensors LLM vs image gen) and routes accordingly.
|
||||||
|
func CreateModel(opts CreateOptions, p *progress.Progress) error {
|
||||||
|
// Detect model type
|
||||||
|
isSafetensors := create.IsSafetensorsModelDir(opts.ModelDir)
|
||||||
|
isImageGen := create.IsTensorModelDir(opts.ModelDir)
|
||||||
|
|
||||||
|
if !isSafetensors && !isImageGen {
|
||||||
|
return fmt.Errorf("%s is not a supported model directory (needs config.json + *.safetensors or model_index.json)", opts.ModelDir)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Determine model type settings
|
||||||
|
var modelType, spinnerKey string
|
||||||
|
var capabilities []string
|
||||||
|
if isSafetensors {
|
||||||
|
modelType = "safetensors model"
|
||||||
|
spinnerKey = "create"
|
||||||
|
capabilities = []string{"completion"}
|
||||||
|
} else {
|
||||||
|
modelType = "image generation model"
|
||||||
|
spinnerKey = "imagegen"
|
||||||
|
capabilities = []string{"image"}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set up progress spinner
|
||||||
|
statusMsg := "importing " + modelType
|
||||||
|
spinner := progress.NewSpinner(statusMsg)
|
||||||
|
p.Add(spinnerKey, spinner)
|
||||||
|
|
||||||
|
progressFn := func(msg string) {
|
||||||
|
spinner.Stop()
|
||||||
|
statusMsg = msg
|
||||||
|
spinner = progress.NewSpinner(statusMsg)
|
||||||
|
p.Add(spinnerKey, spinner)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create the model using shared callbacks
|
||||||
|
var err error
|
||||||
|
if isSafetensors {
|
||||||
|
err = create.CreateSafetensorsModel(
|
||||||
|
opts.ModelName, opts.ModelDir, opts.Quantize,
|
||||||
|
newLayerCreator(), newTensorLayerCreator(),
|
||||||
|
newManifestWriter(opts, capabilities),
|
||||||
|
progressFn,
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
err = create.CreateImageGenModel(
|
||||||
|
opts.ModelName, opts.ModelDir, opts.Quantize,
|
||||||
|
newLayerCreator(), newTensorLayerCreator(),
|
||||||
|
newManifestWriter(opts, capabilities),
|
||||||
|
progressFn,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
spinner.Stop()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Printf("Created %s '%s'\n", modelType, opts.ModelName)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// newLayerCreator returns a LayerCreator callback for creating config/JSON layers.
|
||||||
|
func newLayerCreator() create.LayerCreator {
|
||||||
|
return func(r io.Reader, mediaType, name string) (create.LayerInfo, error) {
|
||||||
|
layer, err := server.NewLayer(r, mediaType)
|
||||||
|
if err != nil {
|
||||||
|
return create.LayerInfo{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return create.LayerInfo{
|
||||||
|
Digest: layer.Digest,
|
||||||
|
Size: layer.Size,
|
||||||
|
MediaType: layer.MediaType,
|
||||||
|
Name: name,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// newTensorLayerCreator returns a QuantizingTensorLayerCreator callback for creating tensor layers.
|
||||||
|
// When quantize is non-empty, returns multiple layers (weight + scales + optional qbias).
|
||||||
|
func newTensorLayerCreator() create.QuantizingTensorLayerCreator {
|
||||||
|
return func(r io.Reader, name, dtype string, shape []int32, quantize string) ([]create.LayerInfo, error) {
|
||||||
|
if quantize != "" {
|
||||||
|
return createQuantizedLayers(r, name, dtype, shape, quantize)
|
||||||
|
}
|
||||||
|
return createUnquantizedLayer(r, name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// createQuantizedLayers quantizes a tensor and returns the resulting layers.
|
||||||
|
func createQuantizedLayers(r io.Reader, name, dtype string, shape []int32, quantize string) ([]create.LayerInfo, error) {
|
||||||
|
if !QuantizeSupported() {
|
||||||
|
return nil, fmt.Errorf("quantization requires MLX support")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Quantize the tensor
|
||||||
|
qweightData, scalesData, qbiasData, _, _, _, err := quantizeTensor(r, name, dtype, shape, quantize)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to quantize %s: %w", name, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create layer for quantized weight
|
||||||
|
weightLayer, err := server.NewLayer(bytes.NewReader(qweightData), server.MediaTypeImageTensor)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create layer for scales
|
||||||
|
scalesLayer, err := server.NewLayer(bytes.NewReader(scalesData), server.MediaTypeImageTensor)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
layers := []create.LayerInfo{
|
||||||
|
{
|
||||||
|
Digest: weightLayer.Digest,
|
||||||
|
Size: weightLayer.Size,
|
||||||
|
MediaType: weightLayer.MediaType,
|
||||||
|
Name: name,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Digest: scalesLayer.Digest,
|
||||||
|
Size: scalesLayer.Size,
|
||||||
|
MediaType: scalesLayer.MediaType,
|
||||||
|
Name: name + "_scale",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add qbiases layer if present (affine mode)
|
||||||
|
if qbiasData != nil {
|
||||||
|
qbiasLayer, err := server.NewLayer(bytes.NewReader(qbiasData), server.MediaTypeImageTensor)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
layers = append(layers, create.LayerInfo{
|
||||||
|
Digest: qbiasLayer.Digest,
|
||||||
|
Size: qbiasLayer.Size,
|
||||||
|
MediaType: qbiasLayer.MediaType,
|
||||||
|
Name: name + "_qbias",
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
return layers, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// createUnquantizedLayer creates a single tensor layer without quantization.
|
||||||
|
func createUnquantizedLayer(r io.Reader, name string) ([]create.LayerInfo, error) {
|
||||||
|
layer, err := server.NewLayer(r, server.MediaTypeImageTensor)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return []create.LayerInfo{
|
||||||
|
{
|
||||||
|
Digest: layer.Digest,
|
||||||
|
Size: layer.Size,
|
||||||
|
MediaType: layer.MediaType,
|
||||||
|
Name: name,
|
||||||
|
},
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// newManifestWriter returns a ManifestWriter callback for writing the model manifest.
|
||||||
|
func newManifestWriter(opts CreateOptions, capabilities []string) create.ManifestWriter {
|
||||||
|
return func(modelName string, config create.LayerInfo, layers []create.LayerInfo) error {
|
||||||
|
name := model.ParseName(modelName)
|
||||||
|
if !name.IsValid() {
|
||||||
|
return fmt.Errorf("invalid model name: %s", modelName)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create config blob with version requirement
|
||||||
|
configData := model.ConfigV2{
|
||||||
|
ModelFormat: "safetensors",
|
||||||
|
Capabilities: capabilities,
|
||||||
|
Requires: MinOllamaVersion,
|
||||||
|
}
|
||||||
|
configJSON, err := json.Marshal(configData)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to marshal config: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create config layer blob
|
||||||
|
configLayer, err := server.NewLayer(bytes.NewReader(configJSON), "application/vnd.docker.container.image.v1+json")
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to create config layer: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert LayerInfo to server.Layer
|
||||||
|
serverLayers := make([]server.Layer, 0, len(layers))
|
||||||
|
for _, l := range layers {
|
||||||
|
serverLayers = append(serverLayers, server.Layer{
|
||||||
|
MediaType: l.MediaType,
|
||||||
|
Digest: l.Digest,
|
||||||
|
Size: l.Size,
|
||||||
|
Name: l.Name,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add Modelfile layers if present
|
||||||
|
if opts.Modelfile != nil {
|
||||||
|
modelfileLayers, err := createModelfileLayers(opts.Modelfile)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
serverLayers = append(serverLayers, modelfileLayers...)
|
||||||
|
}
|
||||||
|
|
||||||
|
return server.WriteManifest(name, configLayer, serverLayers)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// createModelfileLayers creates layers for template, system, and license from Modelfile config.
|
||||||
|
func createModelfileLayers(mf *ModelfileConfig) ([]server.Layer, error) {
|
||||||
|
var layers []server.Layer
|
||||||
|
|
||||||
|
if mf.Template != "" {
|
||||||
|
layer, err := server.NewLayer(bytes.NewReader([]byte(mf.Template)), "application/vnd.ollama.image.template")
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to create template layer: %w", err)
|
||||||
|
}
|
||||||
|
layers = append(layers, layer)
|
||||||
|
}
|
||||||
|
|
||||||
|
if mf.System != "" {
|
||||||
|
layer, err := server.NewLayer(bytes.NewReader([]byte(mf.System)), "application/vnd.ollama.image.system")
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to create system layer: %w", err)
|
||||||
|
}
|
||||||
|
layers = append(layers, layer)
|
||||||
|
}
|
||||||
|
|
||||||
|
if mf.License != "" {
|
||||||
|
layer, err := server.NewLayer(bytes.NewReader([]byte(mf.License)), "application/vnd.ollama.image.license")
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to create license layer: %w", err)
|
||||||
|
}
|
||||||
|
layers = append(layers, layer)
|
||||||
|
}
|
||||||
|
|
||||||
|
return layers, nil
|
||||||
|
}
|
||||||
146
x/create/client/create_test.go
Normal file
@@ -0,0 +1,146 @@
|
|||||||
|
package client
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestModelfileConfig(t *testing.T) {
|
||||||
|
// Test that ModelfileConfig struct works as expected
|
||||||
|
config := &ModelfileConfig{
|
||||||
|
Template: "{{ .Prompt }}",
|
||||||
|
System: "You are a helpful assistant.",
|
||||||
|
License: "MIT",
|
||||||
|
}
|
||||||
|
|
||||||
|
if config.Template != "{{ .Prompt }}" {
|
||||||
|
t.Errorf("Template = %q, want %q", config.Template, "{{ .Prompt }}")
|
||||||
|
}
|
||||||
|
if config.System != "You are a helpful assistant." {
|
||||||
|
t.Errorf("System = %q, want %q", config.System, "You are a helpful assistant.")
|
||||||
|
}
|
||||||
|
if config.License != "MIT" {
|
||||||
|
t.Errorf("License = %q, want %q", config.License, "MIT")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestModelfileConfig_Empty(t *testing.T) {
|
||||||
|
config := &ModelfileConfig{}
|
||||||
|
|
||||||
|
if config.Template != "" {
|
||||||
|
t.Errorf("Template should be empty, got %q", config.Template)
|
||||||
|
}
|
||||||
|
if config.System != "" {
|
||||||
|
t.Errorf("System should be empty, got %q", config.System)
|
||||||
|
}
|
||||||
|
if config.License != "" {
|
||||||
|
t.Errorf("License should be empty, got %q", config.License)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestModelfileConfig_PartialFields(t *testing.T) {
|
||||||
|
// Test config with only some fields set
|
||||||
|
config := &ModelfileConfig{
|
||||||
|
Template: "{{ .Prompt }}",
|
||||||
|
// System and License intentionally empty
|
||||||
|
}
|
||||||
|
|
||||||
|
if config.Template == "" {
|
||||||
|
t.Error("Template should not be empty")
|
||||||
|
}
|
||||||
|
if config.System != "" {
|
||||||
|
t.Error("System should be empty")
|
||||||
|
}
|
||||||
|
if config.License != "" {
|
||||||
|
t.Error("License should be empty")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMinOllamaVersion(t *testing.T) {
|
||||||
|
// Verify the minimum version constant is set
|
||||||
|
if MinOllamaVersion == "" {
|
||||||
|
t.Error("MinOllamaVersion should not be empty")
|
||||||
|
}
|
||||||
|
if MinOllamaVersion != "0.14.0" {
|
||||||
|
t.Errorf("MinOllamaVersion = %q, want %q", MinOllamaVersion, "0.14.0")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCreateModel_InvalidDir(t *testing.T) {
|
||||||
|
// Test that CreateModel returns error for invalid directory
|
||||||
|
err := CreateModel(CreateOptions{
|
||||||
|
ModelName: "test-model",
|
||||||
|
ModelDir: "/nonexistent/path",
|
||||||
|
}, nil)
|
||||||
|
if err == nil {
|
||||||
|
t.Error("expected error for nonexistent directory, got nil")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCreateModel_NotSafetensorsDir(t *testing.T) {
|
||||||
|
// Test that CreateModel returns error for directory without safetensors
|
||||||
|
dir := t.TempDir()
|
||||||
|
|
||||||
|
err := CreateModel(CreateOptions{
|
||||||
|
ModelName: "test-model",
|
||||||
|
ModelDir: dir,
|
||||||
|
}, nil)
|
||||||
|
if err == nil {
|
||||||
|
t.Error("expected error for empty directory, got nil")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCreateOptions(t *testing.T) {
|
||||||
|
opts := CreateOptions{
|
||||||
|
ModelName: "my-model",
|
||||||
|
ModelDir: "/path/to/model",
|
||||||
|
Quantize: "fp8",
|
||||||
|
Modelfile: &ModelfileConfig{
|
||||||
|
Template: "test",
|
||||||
|
System: "system",
|
||||||
|
License: "MIT",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
if opts.ModelName != "my-model" {
|
||||||
|
t.Errorf("ModelName = %q, want %q", opts.ModelName, "my-model")
|
||||||
|
}
|
||||||
|
if opts.ModelDir != "/path/to/model" {
|
||||||
|
t.Errorf("ModelDir = %q, want %q", opts.ModelDir, "/path/to/model")
|
||||||
|
}
|
||||||
|
if opts.Quantize != "fp8" {
|
||||||
|
t.Errorf("Quantize = %q, want %q", opts.Quantize, "fp8")
|
||||||
|
}
|
||||||
|
if opts.Modelfile == nil {
|
||||||
|
t.Error("Modelfile should not be nil")
|
||||||
|
}
|
||||||
|
if opts.Modelfile.Template != "test" {
|
||||||
|
t.Errorf("Modelfile.Template = %q, want %q", opts.Modelfile.Template, "test")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCreateOptions_Defaults(t *testing.T) {
|
||||||
|
opts := CreateOptions{
|
||||||
|
ModelName: "test",
|
||||||
|
ModelDir: "/tmp",
|
||||||
|
}
|
||||||
|
|
||||||
|
// Quantize should default to empty
|
||||||
|
if opts.Quantize != "" {
|
||||||
|
t.Errorf("Quantize should be empty by default, got %q", opts.Quantize)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Modelfile should default to nil
|
||||||
|
if opts.Modelfile != nil {
|
||||||
|
t.Error("Modelfile should be nil by default")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestQuantizeSupported(t *testing.T) {
|
||||||
|
// This just verifies the function exists and returns a boolean
|
||||||
|
// The actual value depends on build tags (mlx vs non-mlx)
|
||||||
|
supported := QuantizeSupported()
|
||||||
|
|
||||||
|
// In non-mlx builds, this should be false
|
||||||
|
// We can't easily test both cases, so just verify it returns something
|
||||||
|
_ = supported
|
||||||
|
}
|
||||||
@@ -11,10 +11,11 @@ import (
|
|||||||
"github.com/ollama/ollama/x/imagegen/mlx"
|
"github.com/ollama/ollama/x/imagegen/mlx"
|
||||||
)
|
)
|
||||||
|
|
||||||
// quantizeTensor loads a tensor from safetensors format, quantizes it to affine int8,
|
// quantizeTensor loads a tensor from safetensors format, quantizes it,
|
||||||
// and returns safetensors data for the quantized weights, scales, and biases.
|
// and returns safetensors data for the quantized weights, scales, and biases.
|
||||||
|
// Supported quantization types: "fp8" (affine 8-bit)
|
||||||
// Uses MLX's native SaveSafetensors to ensure correct dtype handling (especially uint32 for quantized weights).
|
// Uses MLX's native SaveSafetensors to ensure correct dtype handling (especially uint32 for quantized weights).
|
||||||
func quantizeTensor(r io.Reader, name, dtype string, shape []int32) (qweightData, scalesData, qbiasData []byte, qweightShape, scalesShape, qbiasShape []int32, err error) {
|
func quantizeTensor(r io.Reader, name, dtype string, shape []int32, quantize string) (qweightData, scalesData, qbiasData []byte, qweightShape, scalesShape, qbiasShape []int32, err error) {
|
||||||
tmpDir := ensureTempDir()
|
tmpDir := ensureTempDir()
|
||||||
|
|
||||||
// Read safetensors data to a temp file (LoadSafetensorsNative needs a path)
|
// Read safetensors data to a temp file (LoadSafetensorsNative needs a path)
|
||||||
@@ -50,9 +51,18 @@ func quantizeTensor(r io.Reader, name, dtype string, shape []int32) (qweightData
|
|||||||
mlx.Eval(arr)
|
mlx.Eval(arr)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Quantize with affine mode: group_size=32, bits=8
|
// Quantize based on quantization type
|
||||||
// Note: mxfp8 mode doesn't have matmul kernels in MLX, affine mode does
|
var qweight, scales, qbiases *mlx.Array
|
||||||
qweight, scales, qbiases := mlx.Quantize(arr, 32, 8, "affine")
|
switch quantize {
|
||||||
|
case "fp4":
|
||||||
|
// affine mode: group_size=32, bits=4
|
||||||
|
qweight, scales, qbiases = mlx.Quantize(arr, 32, 4, "affine")
|
||||||
|
case "fp8":
|
||||||
|
// affine mode: group_size=32, bits=8
|
||||||
|
qweight, scales, qbiases = mlx.Quantize(arr, 32, 8, "affine")
|
||||||
|
default:
|
||||||
|
return nil, nil, nil, nil, nil, nil, fmt.Errorf("unsupported quantization type: %s", quantize)
|
||||||
|
}
|
||||||
|
|
||||||
// Eval and make contiguous for data access
|
// Eval and make contiguous for data access
|
||||||
qweight = mlx.Contiguous(qweight)
|
qweight = mlx.Contiguous(qweight)
|
||||||
@@ -8,7 +8,7 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
// quantizeTensor is not available without MLX
|
// quantizeTensor is not available without MLX
|
||||||
func quantizeTensor(r io.Reader, name, dtype string, shape []int32) (qweightData, scalesData, qbiasData []byte, qweightShape, scalesShape, qbiasShape []int32, err error) {
|
func quantizeTensor(r io.Reader, name, dtype string, shape []int32, quantize string) (qweightData, scalesData, qbiasData []byte, qweightShape, scalesShape, qbiasShape []int32, err error) {
|
||||||
return nil, nil, nil, nil, nil, nil, fmt.Errorf("quantization requires MLX support (build with mlx tag)")
|
return nil, nil, nil, nil, nil, nil, fmt.Errorf("quantization requires MLX support (build with mlx tag)")
|
||||||
}
|
}
|
||||||
|
|
||||||
399
x/create/create.go
Normal file
@@ -0,0 +1,399 @@
|
|||||||
|
package create
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"slices"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/ollama/ollama/envconfig"
|
||||||
|
"github.com/ollama/ollama/x/imagegen/safetensors"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ModelConfig represents the config blob stored with a model.
|
||||||
|
type ModelConfig struct {
|
||||||
|
ModelFormat string `json:"model_format"`
|
||||||
|
Capabilities []string `json:"capabilities"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Manifest represents the manifest JSON structure.
|
||||||
|
type Manifest struct {
|
||||||
|
SchemaVersion int `json:"schemaVersion"`
|
||||||
|
MediaType string `json:"mediaType"`
|
||||||
|
Config ManifestLayer `json:"config"`
|
||||||
|
Layers []ManifestLayer `json:"layers"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// ManifestLayer represents a layer in the manifest.
|
||||||
|
type ManifestLayer struct {
|
||||||
|
MediaType string `json:"mediaType"`
|
||||||
|
Digest string `json:"digest"`
|
||||||
|
Size int64 `json:"size"`
|
||||||
|
Name string `json:"name,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// defaultManifestDir returns the manifest storage directory.
|
||||||
|
func defaultManifestDir() string {
|
||||||
|
return filepath.Join(envconfig.Models(), "manifests")
|
||||||
|
}
|
||||||
|
|
||||||
|
// defaultBlobDir returns the blob storage directory.
|
||||||
|
func defaultBlobDir() string {
|
||||||
|
return filepath.Join(envconfig.Models(), "blobs")
|
||||||
|
}
|
||||||
|
|
||||||
|
// resolveManifestPath converts a model name to a manifest file path.
|
||||||
|
func resolveManifestPath(modelName string) string {
|
||||||
|
host := "registry.ollama.ai"
|
||||||
|
namespace := "library"
|
||||||
|
name := modelName
|
||||||
|
tag := "latest"
|
||||||
|
|
||||||
|
if idx := strings.LastIndex(name, ":"); idx != -1 {
|
||||||
|
tag = name[idx+1:]
|
||||||
|
name = name[:idx]
|
||||||
|
}
|
||||||
|
|
||||||
|
parts := strings.Split(name, "/")
|
||||||
|
switch len(parts) {
|
||||||
|
case 3:
|
||||||
|
host = parts[0]
|
||||||
|
namespace = parts[1]
|
||||||
|
name = parts[2]
|
||||||
|
case 2:
|
||||||
|
namespace = parts[0]
|
||||||
|
name = parts[1]
|
||||||
|
}
|
||||||
|
|
||||||
|
return filepath.Join(defaultManifestDir(), host, namespace, name, tag)
|
||||||
|
}
|
||||||
|
|
||||||
|
// loadManifest loads a manifest for the given model name.
|
||||||
|
func loadManifest(modelName string) (*Manifest, error) {
|
||||||
|
manifestPath := resolveManifestPath(modelName)
|
||||||
|
|
||||||
|
data, err := os.ReadFile(manifestPath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var manifest Manifest
|
||||||
|
if err := json.Unmarshal(data, &manifest); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return &manifest, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// loadModelConfig loads the config blob for a model.
|
||||||
|
func loadModelConfig(modelName string) (*ModelConfig, error) {
|
||||||
|
manifest, err := loadManifest(modelName)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read the config blob
|
||||||
|
blobName := strings.Replace(manifest.Config.Digest, ":", "-", 1)
|
||||||
|
blobPath := filepath.Join(defaultBlobDir(), blobName)
|
||||||
|
|
||||||
|
data, err := os.ReadFile(blobPath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var config ModelConfig
|
||||||
|
if err := json.Unmarshal(data, &config); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return &config, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsSafetensorsModel checks if a model was created with the experimental
|
||||||
|
// safetensors builder by checking the model format in the config.
|
||||||
|
func IsSafetensorsModel(modelName string) bool {
|
||||||
|
config, err := loadModelConfig(modelName)
|
||||||
|
if err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return config.ModelFormat == "safetensors"
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsSafetensorsLLMModel checks if a model is a safetensors LLM model
|
||||||
|
// (has completion capability, not image generation).
|
||||||
|
func IsSafetensorsLLMModel(modelName string) bool {
|
||||||
|
config, err := loadModelConfig(modelName)
|
||||||
|
if err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return config.ModelFormat == "safetensors" && slices.Contains(config.Capabilities, "completion")
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsImageGenModel checks if a model is an image generation model
|
||||||
|
// (has image capability).
|
||||||
|
func IsImageGenModel(modelName string) bool {
|
||||||
|
config, err := loadModelConfig(modelName)
|
||||||
|
if err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return config.ModelFormat == "safetensors" && slices.Contains(config.Capabilities, "image")
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetModelArchitecture returns the architecture from the model's config.json layer.
|
||||||
|
func GetModelArchitecture(modelName string) (string, error) {
|
||||||
|
manifest, err := loadManifest(modelName)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find the config.json layer
|
||||||
|
for _, layer := range manifest.Layers {
|
||||||
|
if layer.Name == "config.json" && layer.MediaType == "application/vnd.ollama.image.json" {
|
||||||
|
blobName := strings.Replace(layer.Digest, ":", "-", 1)
|
||||||
|
blobPath := filepath.Join(defaultBlobDir(), blobName)
|
||||||
|
|
||||||
|
data, err := os.ReadFile(blobPath)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
var cfg struct {
|
||||||
|
Architectures []string `json:"architectures"`
|
||||||
|
ModelType string `json:"model_type"`
|
||||||
|
}
|
||||||
|
if err := json.Unmarshal(data, &cfg); err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Prefer model_type, fall back to first architecture
|
||||||
|
if cfg.ModelType != "" {
|
||||||
|
return cfg.ModelType, nil
|
||||||
|
}
|
||||||
|
if len(cfg.Architectures) > 0 {
|
||||||
|
return cfg.Architectures[0], nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return "", fmt.Errorf("architecture not found in model config")
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsTensorModelDir checks if the directory contains a diffusers-style tensor model
|
||||||
|
// by looking for model_index.json, which is the standard diffusers pipeline config.
|
||||||
|
func IsTensorModelDir(dir string) bool {
|
||||||
|
_, err := os.Stat(filepath.Join(dir, "model_index.json"))
|
||||||
|
return err == nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsSafetensorsModelDir checks if the directory contains a standard safetensors model
|
||||||
|
// by looking for config.json and at least one .safetensors file.
|
||||||
|
func IsSafetensorsModelDir(dir string) bool {
|
||||||
|
// Must have config.json
|
||||||
|
if _, err := os.Stat(filepath.Join(dir, "config.json")); err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Must have at least one .safetensors file
|
||||||
|
entries, err := os.ReadDir(dir)
|
||||||
|
if err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, entry := range entries {
|
||||||
|
if strings.HasSuffix(entry.Name(), ".safetensors") {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// LayerInfo holds metadata for a created layer.
|
||||||
|
type LayerInfo struct {
|
||||||
|
Digest string
|
||||||
|
Size int64
|
||||||
|
MediaType string
|
||||||
|
Name string // Path-style name: "component/tensor" or "path/to/config.json"
|
||||||
|
}
|
||||||
|
|
||||||
|
// LayerCreator is called to create a blob layer.
|
||||||
|
// name is the path-style name (e.g., "tokenizer/tokenizer.json")
|
||||||
|
type LayerCreator func(r io.Reader, mediaType, name string) (LayerInfo, error)
|
||||||
|
|
||||||
|
// TensorLayerCreator creates a tensor blob layer with metadata.
|
||||||
|
// name is the path-style name including component (e.g., "text_encoder/model.embed_tokens.weight")
|
||||||
|
type TensorLayerCreator func(r io.Reader, name, dtype string, shape []int32) (LayerInfo, error)
|
||||||
|
|
||||||
|
// QuantizingTensorLayerCreator creates tensor layers with optional quantization.
|
||||||
|
// When quantize is non-empty (e.g., "fp8"), returns multiple layers (weight + scales + biases).
|
||||||
|
type QuantizingTensorLayerCreator func(r io.Reader, name, dtype string, shape []int32, quantize string) ([]LayerInfo, error)
|
||||||
|
|
||||||
|
// ManifestWriter writes the manifest file.
|
||||||
|
type ManifestWriter func(modelName string, config LayerInfo, layers []LayerInfo) error
|
||||||
|
|
||||||
|
// ShouldQuantize returns true if a tensor should be quantized.
|
||||||
|
// For image gen models (component non-empty): quantizes linear weights, skipping VAE, embeddings, norms.
|
||||||
|
// For LLM models (component empty): quantizes linear weights, skipping embeddings, norms, and small tensors.
|
||||||
|
func ShouldQuantize(name, component string) bool {
|
||||||
|
// Image gen specific: skip VAE entirely
|
||||||
|
if component == "vae" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip embeddings
|
||||||
|
if strings.Contains(name, "embed") {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip layer norms and RMS norms
|
||||||
|
if strings.Contains(name, "norm") || strings.Contains(name, "ln_") || strings.Contains(name, "layernorm") {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip biases
|
||||||
|
if strings.HasSuffix(name, ".bias") {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only quantize weights
|
||||||
|
return strings.HasSuffix(name, ".weight")
|
||||||
|
}
|
||||||
|
|
||||||
|
// ShouldQuantizeTensor returns true if a tensor should be quantized based on name and shape.
|
||||||
|
// This is a more detailed check that also considers tensor dimensions.
|
||||||
|
func ShouldQuantizeTensor(name string, shape []int32) bool {
|
||||||
|
// Use basic name-based check first
|
||||||
|
if !ShouldQuantize(name, "") {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only quantize 2D tensors (linear layers) - skip 1D (biases, norms) and higher-D (convolutions if any)
|
||||||
|
if len(shape) != 2 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip small tensors (less than 1024 elements) - not worth quantizing
|
||||||
|
if len(shape) >= 2 && int64(shape[0])*int64(shape[1]) < 1024 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// MLX quantization requires last dimension to be divisible by group size (32)
|
||||||
|
if shape[len(shape)-1]%32 != 0 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// CreateSafetensorsModel imports a standard safetensors model from a directory.
|
||||||
|
// This handles Hugging Face style models with config.json and *.safetensors files.
|
||||||
|
// Stores each tensor as a separate blob for fine-grained deduplication.
|
||||||
|
// If quantize is non-empty (e.g., "fp8"), eligible tensors will be quantized.
|
||||||
|
func CreateSafetensorsModel(modelName, modelDir, quantize string, createLayer LayerCreator, createTensorLayer QuantizingTensorLayerCreator, writeManifest ManifestWriter, fn func(status string)) error {
|
||||||
|
var layers []LayerInfo
|
||||||
|
var configLayer LayerInfo
|
||||||
|
|
||||||
|
entries, err := os.ReadDir(modelDir)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to read directory: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process all safetensors files
|
||||||
|
for _, entry := range entries {
|
||||||
|
if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".safetensors") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
stPath := filepath.Join(modelDir, entry.Name())
|
||||||
|
|
||||||
|
// Extract individual tensors from safetensors file
|
||||||
|
extractor, err := safetensors.OpenForExtraction(stPath)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to open %s: %w", stPath, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
tensorNames := extractor.ListTensors()
|
||||||
|
quantizeMsg := ""
|
||||||
|
if quantize != "" {
|
||||||
|
quantizeMsg = fmt.Sprintf(", quantizing to %s", quantize)
|
||||||
|
}
|
||||||
|
fn(fmt.Sprintf("importing %s (%d tensors%s)", entry.Name(), len(tensorNames), quantizeMsg))
|
||||||
|
|
||||||
|
for _, tensorName := range tensorNames {
|
||||||
|
td, err := extractor.GetTensor(tensorName)
|
||||||
|
if err != nil {
|
||||||
|
extractor.Close()
|
||||||
|
return fmt.Errorf("failed to get tensor %s: %w", tensorName, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Determine quantization type for this tensor (empty string if not quantizing)
|
||||||
|
quantizeType := ""
|
||||||
|
if quantize != "" && ShouldQuantizeTensor(tensorName, td.Shape) {
|
||||||
|
quantizeType = quantize
|
||||||
|
}
|
||||||
|
|
||||||
|
// Store as minimal safetensors format (88 bytes header overhead)
|
||||||
|
// This enables native mmap loading via mlx_load_safetensors
|
||||||
|
// createTensorLayer returns multiple layers if quantizing (weight + scales)
|
||||||
|
newLayers, err := createTensorLayer(td.SafetensorsReader(), tensorName, td.Dtype, td.Shape, quantizeType)
|
||||||
|
if err != nil {
|
||||||
|
extractor.Close()
|
||||||
|
return fmt.Errorf("failed to create layer for %s: %w", tensorName, err)
|
||||||
|
}
|
||||||
|
layers = append(layers, newLayers...)
|
||||||
|
}
|
||||||
|
|
||||||
|
extractor.Close()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process all JSON config files
|
||||||
|
for _, entry := range entries {
|
||||||
|
if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".json") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip the index file as we don't need it after extraction
|
||||||
|
if entry.Name() == "model.safetensors.index.json" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
cfgPath := entry.Name()
|
||||||
|
fullPath := filepath.Join(modelDir, cfgPath)
|
||||||
|
|
||||||
|
fn(fmt.Sprintf("importing config %s", cfgPath))
|
||||||
|
|
||||||
|
f, err := os.Open(fullPath)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to open %s: %w", cfgPath, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
layer, err := createLayer(f, "application/vnd.ollama.image.json", cfgPath)
|
||||||
|
f.Close()
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to create layer for %s: %w", cfgPath, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use config.json as the config layer
|
||||||
|
if cfgPath == "config.json" {
|
||||||
|
configLayer = layer
|
||||||
|
}
|
||||||
|
|
||||||
|
layers = append(layers, layer)
|
||||||
|
}
|
||||||
|
|
||||||
|
if configLayer.Digest == "" {
|
||||||
|
return fmt.Errorf("config.json not found in %s", modelDir)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn(fmt.Sprintf("writing manifest for %s", modelName))
|
||||||
|
|
||||||
|
if err := writeManifest(modelName, configLayer, layers); err != nil {
|
||||||
|
return fmt.Errorf("failed to write manifest: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn(fmt.Sprintf("successfully imported %s with %d layers", modelName, len(layers)))
|
||||||
|
return nil
|
||||||
|
}
|
||||||
752
x/create/create_test.go
Normal file
@@ -0,0 +1,752 @@
|
|||||||
|
package create
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"encoding/binary"
|
||||||
|
"encoding/json"
|
||||||
|
"io"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestIsTensorModelDir(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
setup func(dir string) error
|
||||||
|
expected bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "valid diffusers model with model_index.json",
|
||||||
|
setup: func(dir string) error {
|
||||||
|
return os.WriteFile(filepath.Join(dir, "model_index.json"), []byte(`{"_class_name": "FluxPipeline"}`), 0o644)
|
||||||
|
},
|
||||||
|
expected: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "empty directory",
|
||||||
|
setup: func(dir string) error {
|
||||||
|
return nil
|
||||||
|
},
|
||||||
|
expected: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "directory with other files but no model_index.json",
|
||||||
|
setup: func(dir string) error {
|
||||||
|
return os.WriteFile(filepath.Join(dir, "config.json"), []byte(`{}`), 0o644)
|
||||||
|
},
|
||||||
|
expected: false,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
if err := tt.setup(dir); err != nil {
|
||||||
|
t.Fatalf("setup failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
got := IsTensorModelDir(dir)
|
||||||
|
if got != tt.expected {
|
||||||
|
t.Errorf("IsTensorModelDir() = %v, want %v", got, tt.expected)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestIsSafetensorsModelDir(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
setup func(dir string) error
|
||||||
|
expected bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "valid safetensors model with config.json and .safetensors file",
|
||||||
|
setup: func(dir string) error {
|
||||||
|
if err := os.WriteFile(filepath.Join(dir, "config.json"), []byte(`{"model_type": "gemma3"}`), 0o644); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return os.WriteFile(filepath.Join(dir, "model.safetensors"), []byte("dummy"), 0o644)
|
||||||
|
},
|
||||||
|
expected: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "config.json only, no safetensors files",
|
||||||
|
setup: func(dir string) error {
|
||||||
|
return os.WriteFile(filepath.Join(dir, "config.json"), []byte(`{}`), 0o644)
|
||||||
|
},
|
||||||
|
expected: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "safetensors file only, no config.json",
|
||||||
|
setup: func(dir string) error {
|
||||||
|
return os.WriteFile(filepath.Join(dir, "model.safetensors"), []byte("dummy"), 0o644)
|
||||||
|
},
|
||||||
|
expected: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "empty directory",
|
||||||
|
setup: func(dir string) error {
|
||||||
|
return nil
|
||||||
|
},
|
||||||
|
expected: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "multiple safetensors files with config.json",
|
||||||
|
setup: func(dir string) error {
|
||||||
|
if err := os.WriteFile(filepath.Join(dir, "config.json"), []byte(`{}`), 0o644); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if err := os.WriteFile(filepath.Join(dir, "model-00001-of-00002.safetensors"), []byte("dummy"), 0o644); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return os.WriteFile(filepath.Join(dir, "model-00002-of-00002.safetensors"), []byte("dummy"), 0o644)
|
||||||
|
},
|
||||||
|
expected: true,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
if err := tt.setup(dir); err != nil {
|
||||||
|
t.Fatalf("setup failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
got := IsSafetensorsModelDir(dir)
|
||||||
|
if got != tt.expected {
|
||||||
|
t.Errorf("IsSafetensorsModelDir() = %v, want %v", got, tt.expected)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestIsSafetensorsModelDir_NonexistentDir(t *testing.T) {
|
||||||
|
got := IsSafetensorsModelDir("/nonexistent/path/that/does/not/exist")
|
||||||
|
if got != false {
|
||||||
|
t.Errorf("IsSafetensorsModelDir() = %v for nonexistent dir, want false", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// createMinimalSafetensors creates a minimal valid safetensors file with one tensor
|
||||||
|
func createMinimalSafetensors(t *testing.T, path string) {
|
||||||
|
t.Helper()
|
||||||
|
|
||||||
|
// Create a minimal safetensors file with a single float32 tensor
|
||||||
|
header := map[string]interface{}{
|
||||||
|
"test_tensor": map[string]interface{}{
|
||||||
|
"dtype": "F32",
|
||||||
|
"shape": []int{2, 2},
|
||||||
|
"data_offsets": []int{0, 16}, // 4 float32 values = 16 bytes
|
||||||
|
},
|
||||||
|
}
|
||||||
|
headerJSON, err := json.Marshal(header)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("failed to marshal header: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pad header to 8-byte alignment
|
||||||
|
padding := (8 - len(headerJSON)%8) % 8
|
||||||
|
headerJSON = append(headerJSON, bytes.Repeat([]byte(" "), padding)...)
|
||||||
|
|
||||||
|
// Write file
|
||||||
|
f, err := os.Create(path)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("failed to create file: %v", err)
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
// Write header size (8 bytes, little endian)
|
||||||
|
if err := binary.Write(f, binary.LittleEndian, uint64(len(headerJSON))); err != nil {
|
||||||
|
t.Fatalf("failed to write header size: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write header
|
||||||
|
if _, err := f.Write(headerJSON); err != nil {
|
||||||
|
t.Fatalf("failed to write header: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write tensor data (16 bytes of zeros for 4 float32 values)
|
||||||
|
if _, err := f.Write(make([]byte, 16)); err != nil {
|
||||||
|
t.Fatalf("failed to write tensor data: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCreateSafetensorsModel(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
|
||||||
|
// Create config.json
|
||||||
|
configJSON := `{"model_type": "test", "architectures": ["TestModel"]}`
|
||||||
|
if err := os.WriteFile(filepath.Join(dir, "config.json"), []byte(configJSON), 0o644); err != nil {
|
||||||
|
t.Fatalf("failed to write config.json: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a minimal safetensors file
|
||||||
|
createMinimalSafetensors(t, filepath.Join(dir, "model.safetensors"))
|
||||||
|
|
||||||
|
// Track what was created
|
||||||
|
var createdLayers []LayerInfo
|
||||||
|
var manifestWritten bool
|
||||||
|
var manifestModelName string
|
||||||
|
var manifestConfigLayer LayerInfo
|
||||||
|
var manifestLayers []LayerInfo
|
||||||
|
var statusMessages []string
|
||||||
|
|
||||||
|
// Mock callbacks
|
||||||
|
createLayer := func(r io.Reader, mediaType, name string) (LayerInfo, error) {
|
||||||
|
data, err := io.ReadAll(r)
|
||||||
|
if err != nil {
|
||||||
|
return LayerInfo{}, err
|
||||||
|
}
|
||||||
|
layer := LayerInfo{
|
||||||
|
Digest: "sha256:test",
|
||||||
|
Size: int64(len(data)),
|
||||||
|
MediaType: mediaType,
|
||||||
|
Name: name,
|
||||||
|
}
|
||||||
|
createdLayers = append(createdLayers, layer)
|
||||||
|
return layer, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
createTensorLayer := func(r io.Reader, name, dtype string, shape []int32, quantize string) ([]LayerInfo, error) {
|
||||||
|
data, err := io.ReadAll(r)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
layer := LayerInfo{
|
||||||
|
Digest: "sha256:tensor_" + name,
|
||||||
|
Size: int64(len(data)),
|
||||||
|
MediaType: "application/vnd.ollama.image.tensor",
|
||||||
|
Name: name,
|
||||||
|
}
|
||||||
|
createdLayers = append(createdLayers, layer)
|
||||||
|
return []LayerInfo{layer}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
writeManifest := func(modelName string, config LayerInfo, layers []LayerInfo) error {
|
||||||
|
manifestWritten = true
|
||||||
|
manifestModelName = modelName
|
||||||
|
manifestConfigLayer = config
|
||||||
|
manifestLayers = layers
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
progressFn := func(status string) {
|
||||||
|
statusMessages = append(statusMessages, status)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run CreateSafetensorsModel
|
||||||
|
err := CreateSafetensorsModel("test-model", dir, "", createLayer, createTensorLayer, writeManifest, progressFn)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("CreateSafetensorsModel failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify manifest was written
|
||||||
|
if !manifestWritten {
|
||||||
|
t.Error("manifest was not written")
|
||||||
|
}
|
||||||
|
|
||||||
|
if manifestModelName != "test-model" {
|
||||||
|
t.Errorf("manifest model name = %q, want %q", manifestModelName, "test-model")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify config layer was set
|
||||||
|
if manifestConfigLayer.Name != "config.json" {
|
||||||
|
t.Errorf("config layer name = %q, want %q", manifestConfigLayer.Name, "config.json")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify we have at least one tensor and one config layer
|
||||||
|
hasTensor := false
|
||||||
|
hasConfig := false
|
||||||
|
for _, layer := range manifestLayers {
|
||||||
|
if layer.Name == "test_tensor" {
|
||||||
|
hasTensor = true
|
||||||
|
}
|
||||||
|
if layer.Name == "config.json" {
|
||||||
|
hasConfig = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !hasTensor {
|
||||||
|
t.Error("no tensor layer found in manifest")
|
||||||
|
}
|
||||||
|
if !hasConfig {
|
||||||
|
t.Error("no config layer found in manifest")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify status messages were sent
|
||||||
|
if len(statusMessages) == 0 {
|
||||||
|
t.Error("no status messages received")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCreateSafetensorsModel_NoConfigJson(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
|
||||||
|
// Create only a safetensors file, no config.json
|
||||||
|
createMinimalSafetensors(t, filepath.Join(dir, "model.safetensors"))
|
||||||
|
|
||||||
|
// Mock callbacks (minimal)
|
||||||
|
createLayer := func(r io.Reader, mediaType, name string) (LayerInfo, error) {
|
||||||
|
io.ReadAll(r)
|
||||||
|
return LayerInfo{Name: name}, nil
|
||||||
|
}
|
||||||
|
createTensorLayer := func(r io.Reader, name, dtype string, shape []int32, quantize string) ([]LayerInfo, error) {
|
||||||
|
io.ReadAll(r)
|
||||||
|
return []LayerInfo{{Name: name}}, nil
|
||||||
|
}
|
||||||
|
writeManifest := func(modelName string, config LayerInfo, layers []LayerInfo) error {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
progressFn := func(status string) {}
|
||||||
|
|
||||||
|
err := CreateSafetensorsModel("test-model", dir, "", createLayer, createTensorLayer, writeManifest, progressFn)
|
||||||
|
if err == nil {
|
||||||
|
t.Error("expected error for missing config.json, got nil")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCreateSafetensorsModel_EmptyDir(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
|
||||||
|
// Mock callbacks
|
||||||
|
createLayer := func(r io.Reader, mediaType, name string) (LayerInfo, error) {
|
||||||
|
return LayerInfo{}, nil
|
||||||
|
}
|
||||||
|
createTensorLayer := func(r io.Reader, name, dtype string, shape []int32, quantize string) ([]LayerInfo, error) {
|
||||||
|
return []LayerInfo{{}}, nil
|
||||||
|
}
|
||||||
|
writeManifest := func(modelName string, config LayerInfo, layers []LayerInfo) error {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
progressFn := func(status string) {}
|
||||||
|
|
||||||
|
err := CreateSafetensorsModel("test-model", dir, "", createLayer, createTensorLayer, writeManifest, progressFn)
|
||||||
|
if err == nil {
|
||||||
|
t.Error("expected error for empty directory, got nil")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCreateSafetensorsModel_SkipsIndexJson(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
|
||||||
|
// Create config.json
|
||||||
|
if err := os.WriteFile(filepath.Join(dir, "config.json"), []byte(`{}`), 0o644); err != nil {
|
||||||
|
t.Fatalf("failed to write config.json: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create model.safetensors.index.json (should be skipped)
|
||||||
|
indexJSON := `{"metadata": {"total_size": 100}, "weight_map": {}}`
|
||||||
|
if err := os.WriteFile(filepath.Join(dir, "model.safetensors.index.json"), []byte(indexJSON), 0o644); err != nil {
|
||||||
|
t.Fatalf("failed to write index.json: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a minimal safetensors file
|
||||||
|
createMinimalSafetensors(t, filepath.Join(dir, "model.safetensors"))
|
||||||
|
|
||||||
|
var configNames []string
|
||||||
|
|
||||||
|
createLayer := func(r io.Reader, mediaType, name string) (LayerInfo, error) {
|
||||||
|
io.ReadAll(r)
|
||||||
|
configNames = append(configNames, name)
|
||||||
|
return LayerInfo{Name: name, Digest: "sha256:test"}, nil
|
||||||
|
}
|
||||||
|
createTensorLayer := func(r io.Reader, name, dtype string, shape []int32, quantize string) ([]LayerInfo, error) {
|
||||||
|
io.ReadAll(r)
|
||||||
|
return []LayerInfo{{Name: name}}, nil
|
||||||
|
}
|
||||||
|
writeManifest := func(modelName string, config LayerInfo, layers []LayerInfo) error {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
progressFn := func(status string) {}
|
||||||
|
|
||||||
|
err := CreateSafetensorsModel("test-model", dir, "", createLayer, createTensorLayer, writeManifest, progressFn)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("CreateSafetensorsModel failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify model.safetensors.index.json was not included
|
||||||
|
for _, name := range configNames {
|
||||||
|
if name == "model.safetensors.index.json" {
|
||||||
|
t.Error("model.safetensors.index.json should have been skipped")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestResolveManifestPath(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
modelName string
|
||||||
|
wantParts []string // Parts that should appear in the path
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "simple model name",
|
||||||
|
modelName: "llama2",
|
||||||
|
wantParts: []string{"registry.ollama.ai", "library", "llama2", "latest"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "model name with tag",
|
||||||
|
modelName: "llama2:7b",
|
||||||
|
wantParts: []string{"registry.ollama.ai", "library", "llama2", "7b"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "model name with namespace",
|
||||||
|
modelName: "myuser/mymodel",
|
||||||
|
wantParts: []string{"registry.ollama.ai", "myuser", "mymodel", "latest"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "model name with namespace and tag",
|
||||||
|
modelName: "myuser/mymodel:v1",
|
||||||
|
wantParts: []string{"registry.ollama.ai", "myuser", "mymodel", "v1"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "fully qualified model name",
|
||||||
|
modelName: "registry.example.com/namespace/model:tag",
|
||||||
|
wantParts: []string{"registry.example.com", "namespace", "model", "tag"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
got := resolveManifestPath(tt.modelName)
|
||||||
|
|
||||||
|
for _, part := range tt.wantParts {
|
||||||
|
if !strings.Contains(got, part) {
|
||||||
|
t.Errorf("resolveManifestPath(%q) = %q, missing part %q", tt.modelName, got, part)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLayerInfo(t *testing.T) {
|
||||||
|
layer := LayerInfo{
|
||||||
|
Digest: "sha256:abc123",
|
||||||
|
Size: 1024,
|
||||||
|
MediaType: "application/vnd.ollama.image.tensor",
|
||||||
|
Name: "model.weight",
|
||||||
|
}
|
||||||
|
|
||||||
|
if layer.Digest != "sha256:abc123" {
|
||||||
|
t.Errorf("Digest = %q, want %q", layer.Digest, "sha256:abc123")
|
||||||
|
}
|
||||||
|
if layer.Size != 1024 {
|
||||||
|
t.Errorf("Size = %d, want %d", layer.Size, 1024)
|
||||||
|
}
|
||||||
|
if layer.MediaType != "application/vnd.ollama.image.tensor" {
|
||||||
|
t.Errorf("MediaType = %q, want %q", layer.MediaType, "application/vnd.ollama.image.tensor")
|
||||||
|
}
|
||||||
|
if layer.Name != "model.weight" {
|
||||||
|
t.Errorf("Name = %q, want %q", layer.Name, "model.weight")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestModelConfig(t *testing.T) {
|
||||||
|
config := ModelConfig{
|
||||||
|
ModelFormat: "safetensors",
|
||||||
|
Capabilities: []string{"completion", "chat"},
|
||||||
|
}
|
||||||
|
|
||||||
|
if config.ModelFormat != "safetensors" {
|
||||||
|
t.Errorf("ModelFormat = %q, want %q", config.ModelFormat, "safetensors")
|
||||||
|
}
|
||||||
|
if len(config.Capabilities) != 2 {
|
||||||
|
t.Errorf("Capabilities length = %d, want %d", len(config.Capabilities), 2)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestManifest(t *testing.T) {
|
||||||
|
manifest := Manifest{
|
||||||
|
SchemaVersion: 2,
|
||||||
|
MediaType: "application/vnd.oci.image.manifest.v1+json",
|
||||||
|
Config: ManifestLayer{
|
||||||
|
MediaType: "application/vnd.docker.container.image.v1+json",
|
||||||
|
Digest: "sha256:config",
|
||||||
|
Size: 100,
|
||||||
|
},
|
||||||
|
Layers: []ManifestLayer{
|
||||||
|
{
|
||||||
|
MediaType: "application/vnd.ollama.image.tensor",
|
||||||
|
Digest: "sha256:layer1",
|
||||||
|
Size: 1000,
|
||||||
|
Name: "weight.bin",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
if manifest.SchemaVersion != 2 {
|
||||||
|
t.Errorf("SchemaVersion = %d, want %d", manifest.SchemaVersion, 2)
|
||||||
|
}
|
||||||
|
if manifest.Config.Digest != "sha256:config" {
|
||||||
|
t.Errorf("Config.Digest = %q, want %q", manifest.Config.Digest, "sha256:config")
|
||||||
|
}
|
||||||
|
if len(manifest.Layers) != 1 {
|
||||||
|
t.Errorf("Layers length = %d, want %d", len(manifest.Layers), 1)
|
||||||
|
}
|
||||||
|
if manifest.Layers[0].Name != "weight.bin" {
|
||||||
|
t.Errorf("Layers[0].Name = %q, want %q", manifest.Layers[0].Name, "weight.bin")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestShouldQuantize(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
tensor string
|
||||||
|
component string
|
||||||
|
want bool
|
||||||
|
}{
|
||||||
|
// VAE component should never be quantized
|
||||||
|
{"vae weight", "decoder.weight", "vae", false},
|
||||||
|
{"vae bias", "decoder.bias", "vae", false},
|
||||||
|
|
||||||
|
// Embeddings should not be quantized
|
||||||
|
{"embedding weight", "embed_tokens.weight", "", false},
|
||||||
|
{"embedding in name", "token_embedding.weight", "", false},
|
||||||
|
|
||||||
|
// Norms should not be quantized
|
||||||
|
{"layer norm", "layer_norm.weight", "", false},
|
||||||
|
{"rms norm", "rms_norm.weight", "", false},
|
||||||
|
{"ln prefix", "ln_1.weight", "", false},
|
||||||
|
{"layernorm in name", "input_layernorm.weight", "", false},
|
||||||
|
|
||||||
|
// Biases should not be quantized
|
||||||
|
{"bias tensor", "attention.bias", "", false},
|
||||||
|
{"proj bias", "o_proj.bias", "", false},
|
||||||
|
|
||||||
|
// Linear weights should be quantized
|
||||||
|
{"linear weight", "q_proj.weight", "", true},
|
||||||
|
{"attention weight", "self_attn.weight", "", true},
|
||||||
|
{"mlp weight", "mlp.gate_proj.weight", "", true},
|
||||||
|
|
||||||
|
// Transformer component weights should be quantized
|
||||||
|
{"transformer weight", "layers.0.weight", "transformer", true},
|
||||||
|
{"text_encoder weight", "encoder.weight", "text_encoder", true},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
got := ShouldQuantize(tt.tensor, tt.component)
|
||||||
|
if got != tt.want {
|
||||||
|
t.Errorf("ShouldQuantize(%q, %q) = %v, want %v", tt.tensor, tt.component, got, tt.want)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestShouldQuantizeTensor(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
tensor string
|
||||||
|
shape []int32
|
||||||
|
want bool
|
||||||
|
}{
|
||||||
|
// 2D tensors with sufficient size should be quantized
|
||||||
|
{"large 2D weight", "q_proj.weight", []int32{4096, 4096}, true},
|
||||||
|
{"medium 2D weight", "small_proj.weight", []int32{128, 128}, true},
|
||||||
|
|
||||||
|
// Small tensors should not be quantized (< 1024 elements)
|
||||||
|
{"tiny 2D weight", "tiny.weight", []int32{16, 16}, false},
|
||||||
|
{"small 2D weight", "small.weight", []int32{31, 31}, false},
|
||||||
|
|
||||||
|
// 1D tensors should not be quantized
|
||||||
|
{"1D tensor", "layer_norm.weight", []int32{4096}, false},
|
||||||
|
|
||||||
|
// 3D+ tensors should not be quantized
|
||||||
|
{"3D tensor", "conv.weight", []int32{64, 64, 3}, false},
|
||||||
|
{"4D tensor", "conv2d.weight", []int32{64, 64, 3, 3}, false},
|
||||||
|
|
||||||
|
// Embeddings should not be quantized regardless of shape
|
||||||
|
{"embedding 2D", "embed_tokens.weight", []int32{32000, 4096}, false},
|
||||||
|
|
||||||
|
// Norms should not be quantized regardless of shape
|
||||||
|
{"norm 2D", "layer_norm.weight", []int32{4096, 1}, false},
|
||||||
|
|
||||||
|
// Biases should not be quantized
|
||||||
|
{"bias 2D", "proj.bias", []int32{4096, 1}, false},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
got := ShouldQuantizeTensor(tt.tensor, tt.shape)
|
||||||
|
if got != tt.want {
|
||||||
|
t.Errorf("ShouldQuantizeTensor(%q, %v) = %v, want %v", tt.tensor, tt.shape, got, tt.want)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCreateSafetensorsModel_WithQuantize(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
|
||||||
|
// Create config.json
|
||||||
|
configJSON := `{"model_type": "test", "architectures": ["TestModel"]}`
|
||||||
|
if err := os.WriteFile(filepath.Join(dir, "config.json"), []byte(configJSON), 0o644); err != nil {
|
||||||
|
t.Fatalf("failed to write config.json: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a minimal safetensors file
|
||||||
|
createMinimalSafetensors(t, filepath.Join(dir, "model.safetensors"))
|
||||||
|
|
||||||
|
var quantizeRequested []string
|
||||||
|
|
||||||
|
createLayer := func(r io.Reader, mediaType, name string) (LayerInfo, error) {
|
||||||
|
io.ReadAll(r)
|
||||||
|
return LayerInfo{Name: name, Digest: "sha256:test"}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
createTensorLayer := func(r io.Reader, name, dtype string, shape []int32, quantize string) ([]LayerInfo, error) {
|
||||||
|
io.ReadAll(r)
|
||||||
|
quantizeRequested = append(quantizeRequested, quantize)
|
||||||
|
return []LayerInfo{{Name: name}}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
writeManifest := func(modelName string, config LayerInfo, layers []LayerInfo) error {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
progressFn := func(status string) {}
|
||||||
|
|
||||||
|
// Run with quantize enabled
|
||||||
|
err := CreateSafetensorsModel("test-model", dir, "fp8", createLayer, createTensorLayer, writeManifest, progressFn)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("CreateSafetensorsModel failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify quantize was passed to callback (will be false for small test tensor)
|
||||||
|
if len(quantizeRequested) == 0 {
|
||||||
|
t.Error("no tensors processed")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// createMinimalImageGenModel creates a minimal diffusers-style model directory
|
||||||
|
func createMinimalImageGenModel(t *testing.T, dir string) {
|
||||||
|
t.Helper()
|
||||||
|
|
||||||
|
// Create model_index.json
|
||||||
|
modelIndex := `{"_class_name": "FluxPipeline", "_diffusers_version": "0.30.0"}`
|
||||||
|
if err := os.WriteFile(filepath.Join(dir, "model_index.json"), []byte(modelIndex), 0o644); err != nil {
|
||||||
|
t.Fatalf("failed to write model_index.json: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create transformer directory with a safetensors file
|
||||||
|
transformerDir := filepath.Join(dir, "transformer")
|
||||||
|
if err := os.MkdirAll(transformerDir, 0o755); err != nil {
|
||||||
|
t.Fatalf("failed to create transformer dir: %v", err)
|
||||||
|
}
|
||||||
|
createMinimalSafetensors(t, filepath.Join(transformerDir, "model.safetensors"))
|
||||||
|
|
||||||
|
// Create transformer config
|
||||||
|
transformerConfig := `{"hidden_size": 3072}`
|
||||||
|
if err := os.WriteFile(filepath.Join(transformerDir, "config.json"), []byte(transformerConfig), 0o644); err != nil {
|
||||||
|
t.Fatalf("failed to write transformer config: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCreateImageGenModel(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
createMinimalImageGenModel(t, dir)
|
||||||
|
|
||||||
|
var manifestWritten bool
|
||||||
|
var manifestModelName string
|
||||||
|
var statusMessages []string
|
||||||
|
|
||||||
|
createLayer := func(r io.Reader, mediaType, name string) (LayerInfo, error) {
|
||||||
|
io.ReadAll(r)
|
||||||
|
return LayerInfo{Name: name, Digest: "sha256:test"}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
createTensorLayer := func(r io.Reader, name, dtype string, shape []int32, quantize string) ([]LayerInfo, error) {
|
||||||
|
io.ReadAll(r)
|
||||||
|
return []LayerInfo{{Name: name, Digest: "sha256:tensor"}}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
writeManifest := func(modelName string, config LayerInfo, layers []LayerInfo) error {
|
||||||
|
manifestWritten = true
|
||||||
|
manifestModelName = modelName
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
progressFn := func(status string) {
|
||||||
|
statusMessages = append(statusMessages, status)
|
||||||
|
}
|
||||||
|
|
||||||
|
err := CreateImageGenModel("test-imagegen", dir, "", createLayer, createTensorLayer, writeManifest, progressFn)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("CreateImageGenModel failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !manifestWritten {
|
||||||
|
t.Error("manifest was not written")
|
||||||
|
}
|
||||||
|
|
||||||
|
if manifestModelName != "test-imagegen" {
|
||||||
|
t.Errorf("manifest model name = %q, want %q", manifestModelName, "test-imagegen")
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(statusMessages) == 0 {
|
||||||
|
t.Error("no status messages received")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCreateImageGenModel_NoModelIndex(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
|
||||||
|
// Create only transformer without model_index.json
|
||||||
|
transformerDir := filepath.Join(dir, "transformer")
|
||||||
|
if err := os.MkdirAll(transformerDir, 0o755); err != nil {
|
||||||
|
t.Fatalf("failed to create transformer dir: %v", err)
|
||||||
|
}
|
||||||
|
createMinimalSafetensors(t, filepath.Join(transformerDir, "model.safetensors"))
|
||||||
|
|
||||||
|
createLayer := func(r io.Reader, mediaType, name string) (LayerInfo, error) {
|
||||||
|
io.ReadAll(r)
|
||||||
|
return LayerInfo{Name: name}, nil
|
||||||
|
}
|
||||||
|
createTensorLayer := func(r io.Reader, name, dtype string, shape []int32, quantize string) ([]LayerInfo, error) {
|
||||||
|
io.ReadAll(r)
|
||||||
|
return []LayerInfo{{Name: name}}, nil
|
||||||
|
}
|
||||||
|
writeManifest := func(modelName string, config LayerInfo, layers []LayerInfo) error {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
progressFn := func(status string) {}
|
||||||
|
|
||||||
|
err := CreateImageGenModel("test-imagegen", dir, "", createLayer, createTensorLayer, writeManifest, progressFn)
|
||||||
|
if err == nil {
|
||||||
|
t.Error("expected error for missing model_index.json, got nil")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCreateImageGenModel_WithQuantize(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
createMinimalImageGenModel(t, dir)
|
||||||
|
|
||||||
|
var quantizeRequested []string
|
||||||
|
|
||||||
|
createLayer := func(r io.Reader, mediaType, name string) (LayerInfo, error) {
|
||||||
|
io.ReadAll(r)
|
||||||
|
return LayerInfo{Name: name, Digest: "sha256:test"}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
createTensorLayer := func(r io.Reader, name, dtype string, shape []int32, quantize string) ([]LayerInfo, error) {
|
||||||
|
io.ReadAll(r)
|
||||||
|
quantizeRequested = append(quantizeRequested, quantize)
|
||||||
|
return []LayerInfo{{Name: name}}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
writeManifest := func(modelName string, config LayerInfo, layers []LayerInfo) error {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
progressFn := func(status string) {}
|
||||||
|
|
||||||
|
err := CreateImageGenModel("test-imagegen", dir, "fp8", createLayer, createTensorLayer, writeManifest, progressFn)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("CreateImageGenModel failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(quantizeRequested) == 0 {
|
||||||
|
t.Error("no tensors processed")
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
package imagegen
|
package create
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
@@ -12,40 +12,24 @@ import (
|
|||||||
"github.com/ollama/ollama/x/imagegen/safetensors"
|
"github.com/ollama/ollama/x/imagegen/safetensors"
|
||||||
)
|
)
|
||||||
|
|
||||||
// IsTensorModelDir checks if the directory contains a tensor model
|
// CreateImageGenModel imports an image generation model from a directory.
|
||||||
// by looking for model_index.json, which is the standard diffusers pipeline config.
|
|
||||||
func IsTensorModelDir(dir string) bool {
|
|
||||||
_, err := os.Stat(filepath.Join(dir, "model_index.json"))
|
|
||||||
return err == nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// LayerInfo holds metadata for a created layer.
|
|
||||||
type LayerInfo struct {
|
|
||||||
Digest string
|
|
||||||
Size int64
|
|
||||||
MediaType string
|
|
||||||
Name string // Path-style name: "component/tensor" or "path/to/config.json"
|
|
||||||
}
|
|
||||||
|
|
||||||
// LayerCreator is called to create a blob layer.
|
|
||||||
// name is the path-style name (e.g., "tokenizer/tokenizer.json")
|
|
||||||
type LayerCreator func(r io.Reader, mediaType, name string) (LayerInfo, error)
|
|
||||||
|
|
||||||
// TensorLayerCreator creates a tensor blob layer with metadata.
|
|
||||||
// name is the path-style name including component (e.g., "text_encoder/model.embed_tokens.weight")
|
|
||||||
type TensorLayerCreator func(r io.Reader, name, dtype string, shape []int32) (LayerInfo, error)
|
|
||||||
|
|
||||||
// ManifestWriter writes the manifest file.
|
|
||||||
type ManifestWriter func(modelName string, config LayerInfo, layers []LayerInfo) error
|
|
||||||
|
|
||||||
// CreateModel imports an image generation model from a directory.
|
|
||||||
// Stores each tensor as a separate blob for fine-grained deduplication.
|
// Stores each tensor as a separate blob for fine-grained deduplication.
|
||||||
// If quantize is "fp8", linear weights in transformer/text_encoder are quantized to mxfp8 format.
|
// If quantize is specified, linear weights in transformer/text_encoder are quantized.
|
||||||
|
// Supported quantization types: fp8 (or empty for no quantization).
|
||||||
// Layer creation and manifest writing are done via callbacks to avoid import cycles.
|
// Layer creation and manifest writing are done via callbacks to avoid import cycles.
|
||||||
func CreateModel(modelName, modelDir, quantize string, createLayer LayerCreator, createTensorLayer QuantizingTensorLayerCreator, writeManifest ManifestWriter, fn func(status string)) error {
|
func CreateImageGenModel(modelName, modelDir, quantize string, createLayer LayerCreator, createTensorLayer QuantizingTensorLayerCreator, writeManifest ManifestWriter, fn func(status string)) error {
|
||||||
|
// Validate quantization type
|
||||||
|
switch quantize {
|
||||||
|
case "", "fp4", "fp8":
|
||||||
|
// valid
|
||||||
|
default:
|
||||||
|
return fmt.Errorf("unsupported quantization type %q: supported types are fp4, fp8", quantize)
|
||||||
|
}
|
||||||
|
|
||||||
var layers []LayerInfo
|
var layers []LayerInfo
|
||||||
var configLayer LayerInfo
|
var configLayer LayerInfo
|
||||||
var totalParams int64 // Count parameters from original tensor shapes
|
var totalParams int64 // Count parameters from original tensor shapes
|
||||||
|
var torchDtype string // Read from component config for quantization display
|
||||||
|
|
||||||
// Components to process - extract individual tensors from each
|
// Components to process - extract individual tensors from each
|
||||||
components := []string{"text_encoder", "transformer", "vae"}
|
components := []string{"text_encoder", "transformer", "vae"}
|
||||||
@@ -77,8 +61,8 @@ func CreateModel(modelName, modelDir, quantize string, createLayer LayerCreator,
|
|||||||
|
|
||||||
tensorNames := extractor.ListTensors()
|
tensorNames := extractor.ListTensors()
|
||||||
quantizeMsg := ""
|
quantizeMsg := ""
|
||||||
if quantize == "fp8" && component != "vae" {
|
if quantize != "" && component != "vae" {
|
||||||
quantizeMsg = ", quantizing to fp8"
|
quantizeMsg = ", quantizing to " + quantize
|
||||||
}
|
}
|
||||||
fn(fmt.Sprintf("importing %s/%s (%d tensors%s)", component, entry.Name(), len(tensorNames), quantizeMsg))
|
fn(fmt.Sprintf("importing %s/%s (%d tensors%s)", component, entry.Name(), len(tensorNames), quantizeMsg))
|
||||||
|
|
||||||
@@ -103,11 +87,14 @@ func CreateModel(modelName, modelDir, quantize string, createLayer LayerCreator,
|
|||||||
// Use path-style name: "component/tensor_name"
|
// Use path-style name: "component/tensor_name"
|
||||||
fullName := component + "/" + tensorName
|
fullName := component + "/" + tensorName
|
||||||
|
|
||||||
// Determine if this tensor should be quantized
|
// Determine quantization type for this tensor (empty string if not quantizing)
|
||||||
doQuantize := quantize == "fp8" && ShouldQuantize(tensorName, component)
|
quantizeType := ""
|
||||||
|
if quantize != "" && ShouldQuantize(tensorName, component) && canQuantizeShape(td.Shape) {
|
||||||
|
quantizeType = quantize
|
||||||
|
}
|
||||||
|
|
||||||
// createTensorLayer returns multiple layers if quantizing (weight + scales)
|
// createTensorLayer returns multiple layers if quantizing (weight + scales)
|
||||||
newLayers, err := createTensorLayer(td.SafetensorsReader(), fullName, td.Dtype, td.Shape, doQuantize)
|
newLayers, err := createTensorLayer(td.SafetensorsReader(), fullName, td.Dtype, td.Shape, quantizeType)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
extractor.Close()
|
extractor.Close()
|
||||||
return fmt.Errorf("failed to create layer for %s: %w", fullName, err)
|
return fmt.Errorf("failed to create layer for %s: %w", fullName, err)
|
||||||
@@ -119,6 +106,19 @@ func CreateModel(modelName, modelDir, quantize string, createLayer LayerCreator,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Read torch_dtype from text_encoder config for quantization display
|
||||||
|
if torchDtype == "" {
|
||||||
|
textEncoderConfig := filepath.Join(modelDir, "text_encoder/config.json")
|
||||||
|
if data, err := os.ReadFile(textEncoderConfig); err == nil {
|
||||||
|
var cfg struct {
|
||||||
|
TorchDtype string `json:"torch_dtype"`
|
||||||
|
}
|
||||||
|
if json.Unmarshal(data, &cfg) == nil && cfg.TorchDtype != "" {
|
||||||
|
torchDtype = cfg.TorchDtype
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Import config files
|
// Import config files
|
||||||
configFiles := []string{
|
configFiles := []string{
|
||||||
"model_index.json",
|
"model_index.json",
|
||||||
@@ -164,11 +164,11 @@ func CreateModel(modelName, modelDir, quantize string, createLayer LayerCreator,
|
|||||||
// Add parameter count (counted from tensor shapes during import)
|
// Add parameter count (counted from tensor shapes during import)
|
||||||
cfg["parameter_count"] = totalParams
|
cfg["parameter_count"] = totalParams
|
||||||
|
|
||||||
// Add quantization info
|
// Add quantization info - use quantize type if set, otherwise torch_dtype
|
||||||
if quantize == "fp8" {
|
if quantize != "" {
|
||||||
cfg["quantization"] = "FP8"
|
cfg["quantization"] = strings.ToUpper(quantize)
|
||||||
} else {
|
} else {
|
||||||
cfg["quantization"] = "BF16"
|
cfg["quantization"] = torchDtype
|
||||||
}
|
}
|
||||||
|
|
||||||
data, err = json.MarshalIndent(cfg, "", " ")
|
data, err = json.MarshalIndent(cfg, "", " ")
|
||||||
@@ -211,3 +211,12 @@ func CreateModel(modelName, modelDir, quantize string, createLayer LayerCreator,
|
|||||||
fn(fmt.Sprintf("successfully imported %s with %d layers", modelName, len(layers)))
|
fn(fmt.Sprintf("successfully imported %s with %d layers", modelName, len(layers)))
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// canQuantizeShape returns true if a tensor shape is compatible with MLX quantization.
|
||||||
|
// MLX requires the last dimension to be divisible by the group size (32).
|
||||||
|
func canQuantizeShape(shape []int32) bool {
|
||||||
|
if len(shape) < 2 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return shape[len(shape)-1]%32 == 0
|
||||||
|
}
|
||||||
@@ -1,231 +0,0 @@
|
|||||||
package api
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"net/http"
|
|
||||||
"strconv"
|
|
||||||
"strings"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/gin-gonic/gin"
|
|
||||||
|
|
||||||
"github.com/ollama/ollama/api"
|
|
||||||
"github.com/ollama/ollama/llm"
|
|
||||||
"github.com/ollama/ollama/x/imagegen"
|
|
||||||
)
|
|
||||||
|
|
||||||
// RunnerScheduler is the interface for scheduling a model runner.
|
|
||||||
// This is implemented by server.Server to avoid circular imports.
|
|
||||||
type RunnerScheduler interface {
|
|
||||||
ScheduleImageGenRunner(ctx *gin.Context, modelName string, opts api.Options, keepAlive *api.Duration) (llm.LlamaServer, error)
|
|
||||||
}
|
|
||||||
|
|
||||||
// RegisterRoutes registers the image generation API routes.
|
|
||||||
func RegisterRoutes(r gin.IRouter, scheduler RunnerScheduler) {
|
|
||||||
r.POST("/v1/images/generations", func(c *gin.Context) {
|
|
||||||
ImageGenerationHandler(c, scheduler)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
// ImageGenerationHandler handles OpenAI-compatible image generation requests.
|
|
||||||
func ImageGenerationHandler(c *gin.Context, scheduler RunnerScheduler) {
|
|
||||||
var req ImageGenerationRequest
|
|
||||||
if err := c.BindJSON(&req); err != nil {
|
|
||||||
c.JSON(http.StatusBadRequest, gin.H{"error": gin.H{"message": err.Error()}})
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Validate required fields
|
|
||||||
if req.Model == "" {
|
|
||||||
c.JSON(http.StatusBadRequest, gin.H{"error": gin.H{"message": "model is required"}})
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if req.Prompt == "" {
|
|
||||||
c.JSON(http.StatusBadRequest, gin.H{"error": gin.H{"message": "prompt is required"}})
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Apply defaults
|
|
||||||
if req.N == 0 {
|
|
||||||
req.N = 1
|
|
||||||
}
|
|
||||||
if req.Size == "" {
|
|
||||||
req.Size = "1024x1024"
|
|
||||||
}
|
|
||||||
if req.ResponseFormat == "" {
|
|
||||||
req.ResponseFormat = "b64_json"
|
|
||||||
}
|
|
||||||
|
|
||||||
// Verify model exists
|
|
||||||
if imagegen.ResolveModelName(req.Model) == "" {
|
|
||||||
c.JSON(http.StatusNotFound, gin.H{"error": gin.H{"message": fmt.Sprintf("model %q not found", req.Model)}})
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Parse size
|
|
||||||
width, height := parseSize(req.Size)
|
|
||||||
|
|
||||||
// Build options - we repurpose NumCtx/NumGPU for width/height
|
|
||||||
opts := api.Options{}
|
|
||||||
opts.NumCtx = int(width)
|
|
||||||
opts.NumGPU = int(height)
|
|
||||||
|
|
||||||
// Schedule runner
|
|
||||||
runner, err := scheduler.ScheduleImageGenRunner(c, req.Model, opts, nil)
|
|
||||||
if err != nil {
|
|
||||||
status := http.StatusInternalServerError
|
|
||||||
if strings.Contains(err.Error(), "not found") {
|
|
||||||
status = http.StatusNotFound
|
|
||||||
}
|
|
||||||
c.JSON(status, gin.H{"error": gin.H{"message": err.Error()}})
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Build completion request
|
|
||||||
completionReq := llm.CompletionRequest{
|
|
||||||
Prompt: req.Prompt,
|
|
||||||
Options: &opts,
|
|
||||||
}
|
|
||||||
|
|
||||||
if req.Stream {
|
|
||||||
handleStreamingResponse(c, runner, completionReq, req.ResponseFormat)
|
|
||||||
} else {
|
|
||||||
handleNonStreamingResponse(c, runner, completionReq, req.ResponseFormat)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func handleStreamingResponse(c *gin.Context, runner llm.LlamaServer, req llm.CompletionRequest, format string) {
|
|
||||||
c.Header("Content-Type", "text/event-stream")
|
|
||||||
c.Header("Cache-Control", "no-cache")
|
|
||||||
c.Header("Connection", "keep-alive")
|
|
||||||
|
|
||||||
var imageBase64 string
|
|
||||||
err := runner.Completion(c.Request.Context(), req, func(resp llm.CompletionResponse) {
|
|
||||||
if resp.Done {
|
|
||||||
imageBase64 = extractBase64(resp.Content)
|
|
||||||
} else {
|
|
||||||
progress := parseProgress(resp.Content)
|
|
||||||
if progress.Total > 0 {
|
|
||||||
c.SSEvent("progress", progress)
|
|
||||||
c.Writer.Flush()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
})
|
|
||||||
if err != nil {
|
|
||||||
c.SSEvent("error", gin.H{"error": err.Error()})
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
c.SSEvent("done", buildResponse(imageBase64, format))
|
|
||||||
}
|
|
||||||
|
|
||||||
func handleNonStreamingResponse(c *gin.Context, runner llm.LlamaServer, req llm.CompletionRequest, format string) {
|
|
||||||
var imageBase64 string
|
|
||||||
err := runner.Completion(c.Request.Context(), req, func(resp llm.CompletionResponse) {
|
|
||||||
if resp.Done {
|
|
||||||
imageBase64 = extractBase64(resp.Content)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
if err != nil {
|
|
||||||
c.JSON(http.StatusInternalServerError, gin.H{"error": gin.H{"message": err.Error()}})
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
c.JSON(http.StatusOK, buildResponse(imageBase64, format))
|
|
||||||
}
|
|
||||||
|
|
||||||
func parseSize(size string) (int32, int32) {
|
|
||||||
parts := strings.Split(size, "x")
|
|
||||||
if len(parts) != 2 {
|
|
||||||
return 1024, 1024
|
|
||||||
}
|
|
||||||
w, _ := strconv.Atoi(parts[0])
|
|
||||||
h, _ := strconv.Atoi(parts[1])
|
|
||||||
if w == 0 {
|
|
||||||
w = 1024
|
|
||||||
}
|
|
||||||
if h == 0 {
|
|
||||||
h = 1024
|
|
||||||
}
|
|
||||||
return int32(w), int32(h)
|
|
||||||
}
|
|
||||||
|
|
||||||
func extractBase64(content string) string {
|
|
||||||
if strings.HasPrefix(content, "IMAGE_BASE64:") {
|
|
||||||
return content[13:]
|
|
||||||
}
|
|
||||||
return ""
|
|
||||||
}
|
|
||||||
|
|
||||||
func parseProgress(content string) ImageProgressEvent {
|
|
||||||
var step, total int
|
|
||||||
fmt.Sscanf(content, "\rGenerating: step %d/%d", &step, &total)
|
|
||||||
return ImageProgressEvent{Step: step, Total: total}
|
|
||||||
}
|
|
||||||
|
|
||||||
func buildResponse(imageBase64, format string) ImageGenerationResponse {
|
|
||||||
resp := ImageGenerationResponse{
|
|
||||||
Created: time.Now().Unix(),
|
|
||||||
Data: make([]ImageData, 1),
|
|
||||||
}
|
|
||||||
|
|
||||||
if imageBase64 == "" {
|
|
||||||
return resp
|
|
||||||
}
|
|
||||||
|
|
||||||
if format == "url" {
|
|
||||||
// URL format not supported when using base64 transfer
|
|
||||||
resp.Data[0].B64JSON = imageBase64
|
|
||||||
} else {
|
|
||||||
resp.Data[0].B64JSON = imageBase64
|
|
||||||
}
|
|
||||||
|
|
||||||
return resp
|
|
||||||
}
|
|
||||||
|
|
||||||
// HandleGenerateRequest handles Ollama /api/generate requests for image gen models.
|
|
||||||
// This allows routes.go to delegate image generation with minimal code.
|
|
||||||
func HandleGenerateRequest(c *gin.Context, scheduler RunnerScheduler, modelName, prompt string, keepAlive *api.Duration, streamFn func(c *gin.Context, ch chan any)) {
|
|
||||||
opts := api.Options{}
|
|
||||||
|
|
||||||
// Schedule runner
|
|
||||||
runner, err := scheduler.ScheduleImageGenRunner(c, modelName, opts, keepAlive)
|
|
||||||
if err != nil {
|
|
||||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Build completion request
|
|
||||||
completionReq := llm.CompletionRequest{
|
|
||||||
Prompt: prompt,
|
|
||||||
Options: &opts,
|
|
||||||
}
|
|
||||||
|
|
||||||
// Stream responses via channel
|
|
||||||
ch := make(chan any)
|
|
||||||
go func() {
|
|
||||||
defer close(ch)
|
|
||||||
err := runner.Completion(c.Request.Context(), completionReq, func(resp llm.CompletionResponse) {
|
|
||||||
ch <- GenerateResponse{
|
|
||||||
Model: modelName,
|
|
||||||
CreatedAt: time.Now().UTC(),
|
|
||||||
Response: resp.Content,
|
|
||||||
Done: resp.Done,
|
|
||||||
}
|
|
||||||
})
|
|
||||||
if err != nil {
|
|
||||||
// Log error but don't block - channel is already being consumed
|
|
||||||
_ = err
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
|
|
||||||
streamFn(c, ch)
|
|
||||||
}
|
|
||||||
|
|
||||||
// GenerateResponse matches api.GenerateResponse structure for streaming.
|
|
||||||
type GenerateResponse struct {
|
|
||||||
Model string `json:"model"`
|
|
||||||
CreatedAt time.Time `json:"created_at"`
|
|
||||||
Response string `json:"response"`
|
|
||||||
Done bool `json:"done"`
|
|
||||||
}
|
|
||||||
@@ -1,31 +0,0 @@
|
|||||||
// Package api provides OpenAI-compatible image generation API types.
|
|
||||||
package api
|
|
||||||
|
|
||||||
// ImageGenerationRequest is an OpenAI-compatible image generation request.
|
|
||||||
type ImageGenerationRequest struct {
|
|
||||||
Model string `json:"model"`
|
|
||||||
Prompt string `json:"prompt"`
|
|
||||||
N int `json:"n,omitempty"`
|
|
||||||
Size string `json:"size,omitempty"`
|
|
||||||
ResponseFormat string `json:"response_format,omitempty"`
|
|
||||||
Stream bool `json:"stream,omitempty"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// ImageGenerationResponse is an OpenAI-compatible image generation response.
|
|
||||||
type ImageGenerationResponse struct {
|
|
||||||
Created int64 `json:"created"`
|
|
||||||
Data []ImageData `json:"data"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// ImageData contains the generated image data.
|
|
||||||
type ImageData struct {
|
|
||||||
URL string `json:"url,omitempty"`
|
|
||||||
B64JSON string `json:"b64_json,omitempty"`
|
|
||||||
RevisedPrompt string `json:"revised_prompt,omitempty"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// ImageProgressEvent is sent during streaming to indicate generation progress.
|
|
||||||
type ImageProgressEvent struct {
|
|
||||||
Step int `json:"step"`
|
|
||||||
Total int `json:"total"`
|
|
||||||
}
|
|
||||||
@@ -7,7 +7,6 @@ package imagegen
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/base64"
|
"encoding/base64"
|
||||||
"encoding/json"
|
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
@@ -39,79 +38,20 @@ func DefaultOptions() ImageGenOptions {
|
|||||||
return ImageGenOptions{
|
return ImageGenOptions{
|
||||||
Width: 1024,
|
Width: 1024,
|
||||||
Height: 1024,
|
Height: 1024,
|
||||||
Steps: 9,
|
Steps: 0, // 0 means model default
|
||||||
Seed: 0, // 0 means random
|
Seed: 0, // 0 means random
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ModelInfo contains metadata about an image generation model.
|
|
||||||
type ModelInfo struct {
|
|
||||||
Architecture string
|
|
||||||
ParameterCount int64
|
|
||||||
Quantization string
|
|
||||||
}
|
|
||||||
|
|
||||||
// GetModelInfo returns metadata about an image generation model.
|
|
||||||
func GetModelInfo(modelName string) (*ModelInfo, error) {
|
|
||||||
manifest, err := LoadManifest(modelName)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("failed to load manifest: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
info := &ModelInfo{}
|
|
||||||
|
|
||||||
// Read model_index.json for architecture, parameter count, and quantization
|
|
||||||
if data, err := manifest.ReadConfig("model_index.json"); err == nil {
|
|
||||||
var index struct {
|
|
||||||
Architecture string `json:"architecture"`
|
|
||||||
ParameterCount int64 `json:"parameter_count"`
|
|
||||||
Quantization string `json:"quantization"`
|
|
||||||
}
|
|
||||||
if json.Unmarshal(data, &index) == nil {
|
|
||||||
info.Architecture = index.Architecture
|
|
||||||
info.ParameterCount = index.ParameterCount
|
|
||||||
info.Quantization = index.Quantization
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fallback: detect quantization from tensor names if not in config
|
|
||||||
if info.Quantization == "" {
|
|
||||||
for _, layer := range manifest.Manifest.Layers {
|
|
||||||
if strings.HasSuffix(layer.Name, ".weight_scale") {
|
|
||||||
info.Quantization = "FP8"
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if info.Quantization == "" {
|
|
||||||
info.Quantization = "BF16"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fallback: estimate parameter count if not in config
|
|
||||||
if info.ParameterCount == 0 {
|
|
||||||
var totalSize int64
|
|
||||||
for _, layer := range manifest.Manifest.Layers {
|
|
||||||
if layer.MediaType == "application/vnd.ollama.image.tensor" {
|
|
||||||
if !strings.HasSuffix(layer.Name, "_scale") && !strings.HasSuffix(layer.Name, "_qbias") {
|
|
||||||
totalSize += layer.Size
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Assume BF16 (2 bytes/param) as rough estimate
|
|
||||||
info.ParameterCount = totalSize / 2
|
|
||||||
}
|
|
||||||
|
|
||||||
return info, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// RegisterFlags adds image generation flags to the given command.
|
// RegisterFlags adds image generation flags to the given command.
|
||||||
// Flags are hidden since they only apply to image generation models.
|
// Flags are hidden since they only apply to image generation models.
|
||||||
func RegisterFlags(cmd *cobra.Command) {
|
func RegisterFlags(cmd *cobra.Command) {
|
||||||
cmd.Flags().Int("width", 1024, "Image width")
|
cmd.Flags().Int("width", 1024, "Image width")
|
||||||
cmd.Flags().Int("height", 1024, "Image height")
|
cmd.Flags().Int("height", 1024, "Image height")
|
||||||
cmd.Flags().Int("steps", 9, "Denoising steps")
|
cmd.Flags().Int("steps", 0, "Denoising steps (0 = model default)")
|
||||||
cmd.Flags().Int("seed", 0, "Random seed (0 for random)")
|
cmd.Flags().Int("seed", 0, "Random seed (0 for random)")
|
||||||
cmd.Flags().String("negative", "", "Negative prompt")
|
cmd.Flags().String("negative", "", "Negative prompt")
|
||||||
|
// Hide from main flags section - shown in separate section via AppendFlagsDocs
|
||||||
cmd.Flags().MarkHidden("width")
|
cmd.Flags().MarkHidden("width")
|
||||||
cmd.Flags().MarkHidden("height")
|
cmd.Flags().MarkHidden("height")
|
||||||
cmd.Flags().MarkHidden("steps")
|
cmd.Flags().MarkHidden("steps")
|
||||||
@@ -119,6 +59,19 @@ func RegisterFlags(cmd *cobra.Command) {
|
|||||||
cmd.Flags().MarkHidden("negative")
|
cmd.Flags().MarkHidden("negative")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// AppendFlagsDocs appends image generation flags documentation to the command's usage template.
|
||||||
|
func AppendFlagsDocs(cmd *cobra.Command) {
|
||||||
|
usage := `
|
||||||
|
Image Generation Flags (experimental):
|
||||||
|
--width int Image width
|
||||||
|
--height int Image height
|
||||||
|
--steps int Denoising steps
|
||||||
|
--seed int Random seed
|
||||||
|
--negative str Negative prompt
|
||||||
|
`
|
||||||
|
cmd.SetUsageTemplate(cmd.UsageTemplate() + usage)
|
||||||
|
}
|
||||||
|
|
||||||
// RunCLI handles the CLI for image generation models.
|
// RunCLI handles the CLI for image generation models.
|
||||||
// Returns true if it handled the request, false if the caller should continue with normal flow.
|
// Returns true if it handled the request, false if the caller should continue with normal flow.
|
||||||
// Supports flags: --width, --height, --steps, --seed, --negative
|
// Supports flags: --width, --height, --steps, --seed, --negative
|
||||||
@@ -158,17 +111,15 @@ func generateImageWithOptions(cmd *cobra.Command, modelName, prompt string, keep
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Build request with image gen options encoded in Options fields
|
|
||||||
// NumCtx=width, NumGPU=height, NumPredict=steps, Seed=seed
|
|
||||||
req := &api.GenerateRequest{
|
req := &api.GenerateRequest{
|
||||||
Model: modelName,
|
Model: modelName,
|
||||||
Prompt: prompt,
|
Prompt: prompt,
|
||||||
Options: map[string]any{
|
Width: int32(opts.Width),
|
||||||
"num_ctx": opts.Width,
|
Height: int32(opts.Height),
|
||||||
"num_gpu": opts.Height,
|
Steps: int32(opts.Steps),
|
||||||
"num_predict": opts.Steps,
|
}
|
||||||
"seed": opts.Seed,
|
if opts.Seed != 0 {
|
||||||
},
|
req.Options = map[string]any{"seed": opts.Seed}
|
||||||
}
|
}
|
||||||
if keepAlive != nil {
|
if keepAlive != nil {
|
||||||
req.KeepAlive = keepAlive
|
req.KeepAlive = keepAlive
|
||||||
@@ -182,32 +133,25 @@ func generateImageWithOptions(cmd *cobra.Command, modelName, prompt string, keep
|
|||||||
var stepBar *progress.StepBar
|
var stepBar *progress.StepBar
|
||||||
var imageBase64 string
|
var imageBase64 string
|
||||||
err = client.Generate(cmd.Context(), req, func(resp api.GenerateResponse) error {
|
err = client.Generate(cmd.Context(), req, func(resp api.GenerateResponse) error {
|
||||||
content := resp.Response
|
// Handle progress updates using structured fields
|
||||||
|
if resp.Total > 0 {
|
||||||
// Handle progress updates - parse step info and switch to step bar
|
if stepBar == nil {
|
||||||
if strings.HasPrefix(content, "\rGenerating:") {
|
|
||||||
var step, total int
|
|
||||||
fmt.Sscanf(content, "\rGenerating: step %d/%d", &step, &total)
|
|
||||||
if stepBar == nil && total > 0 {
|
|
||||||
spinner.Stop()
|
spinner.Stop()
|
||||||
stepBar = progress.NewStepBar("Generating", total)
|
stepBar = progress.NewStepBar("Generating", int(resp.Total))
|
||||||
p.Add("", stepBar)
|
p.Add("", stepBar)
|
||||||
}
|
}
|
||||||
if stepBar != nil {
|
stepBar.Set(int(resp.Completed))
|
||||||
stepBar.Set(step)
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Handle final response with base64 image data
|
// Handle final response with image data
|
||||||
if resp.Done && strings.HasPrefix(content, "IMAGE_BASE64:") {
|
if resp.Done && resp.Image != "" {
|
||||||
imageBase64 = content[13:]
|
imageBase64 = resp.Image
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
})
|
})
|
||||||
|
|
||||||
p.Stop()
|
p.StopAndClear()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -245,6 +189,23 @@ func runInteractive(cmd *cobra.Command, modelName string, keepAlive *api.Duratio
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Preload the model with the specified keepalive
|
||||||
|
p := progress.NewProgress(os.Stderr)
|
||||||
|
spinner := progress.NewSpinner("")
|
||||||
|
p.Add("", spinner)
|
||||||
|
|
||||||
|
preloadReq := &api.GenerateRequest{
|
||||||
|
Model: modelName,
|
||||||
|
KeepAlive: keepAlive,
|
||||||
|
}
|
||||||
|
if err := client.Generate(cmd.Context(), preloadReq, func(resp api.GenerateResponse) error {
|
||||||
|
return nil
|
||||||
|
}); err != nil {
|
||||||
|
p.StopAndClear()
|
||||||
|
return fmt.Errorf("failed to load model: %w", err)
|
||||||
|
}
|
||||||
|
p.StopAndClear()
|
||||||
|
|
||||||
scanner, err := readline.New(readline.Prompt{
|
scanner, err := readline.New(readline.Prompt{
|
||||||
Prompt: ">>> ",
|
Prompt: ">>> ",
|
||||||
Placeholder: "Describe an image to generate (/help for commands)",
|
Placeholder: "Describe an image to generate (/help for commands)",
|
||||||
@@ -282,7 +243,7 @@ func runInteractive(cmd *cobra.Command, modelName string, keepAlive *api.Duratio
|
|||||||
case strings.HasPrefix(line, "/bye"):
|
case strings.HasPrefix(line, "/bye"):
|
||||||
return nil
|
return nil
|
||||||
case strings.HasPrefix(line, "/?"), strings.HasPrefix(line, "/help"):
|
case strings.HasPrefix(line, "/?"), strings.HasPrefix(line, "/help"):
|
||||||
printInteractiveHelp(opts)
|
printInteractiveHelp()
|
||||||
continue
|
continue
|
||||||
case strings.HasPrefix(line, "/set "):
|
case strings.HasPrefix(line, "/set "):
|
||||||
if err := handleSetCommand(line[5:], &opts); err != nil {
|
if err := handleSetCommand(line[5:], &opts); err != nil {
|
||||||
@@ -301,12 +262,12 @@ func runInteractive(cmd *cobra.Command, modelName string, keepAlive *api.Duratio
|
|||||||
req := &api.GenerateRequest{
|
req := &api.GenerateRequest{
|
||||||
Model: modelName,
|
Model: modelName,
|
||||||
Prompt: line,
|
Prompt: line,
|
||||||
Options: map[string]any{
|
Width: int32(opts.Width),
|
||||||
"num_ctx": opts.Width,
|
Height: int32(opts.Height),
|
||||||
"num_gpu": opts.Height,
|
Steps: int32(opts.Steps),
|
||||||
"num_predict": opts.Steps,
|
}
|
||||||
"seed": opts.Seed,
|
if opts.Seed != 0 {
|
||||||
},
|
req.Options = map[string]any{"seed": opts.Seed}
|
||||||
}
|
}
|
||||||
if keepAlive != nil {
|
if keepAlive != nil {
|
||||||
req.KeepAlive = keepAlive
|
req.KeepAlive = keepAlive
|
||||||
@@ -321,32 +282,25 @@ func runInteractive(cmd *cobra.Command, modelName string, keepAlive *api.Duratio
|
|||||||
var imageBase64 string
|
var imageBase64 string
|
||||||
|
|
||||||
err = client.Generate(cmd.Context(), req, func(resp api.GenerateResponse) error {
|
err = client.Generate(cmd.Context(), req, func(resp api.GenerateResponse) error {
|
||||||
content := resp.Response
|
// Handle progress updates using structured fields
|
||||||
|
if resp.Total > 0 {
|
||||||
// Handle progress updates - parse step info and switch to step bar
|
if stepBar == nil {
|
||||||
if strings.HasPrefix(content, "\rGenerating:") {
|
|
||||||
var step, total int
|
|
||||||
fmt.Sscanf(content, "\rGenerating: step %d/%d", &step, &total)
|
|
||||||
if stepBar == nil && total > 0 {
|
|
||||||
spinner.Stop()
|
spinner.Stop()
|
||||||
stepBar = progress.NewStepBar("Generating", total)
|
stepBar = progress.NewStepBar("Generating", int(resp.Total))
|
||||||
p.Add("", stepBar)
|
p.Add("", stepBar)
|
||||||
}
|
}
|
||||||
if stepBar != nil {
|
stepBar.Set(int(resp.Completed))
|
||||||
stepBar.Set(step)
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Handle final response with base64 image data
|
// Handle final response with image data
|
||||||
if resp.Done && strings.HasPrefix(content, "IMAGE_BASE64:") {
|
if resp.Done && resp.Image != "" {
|
||||||
imageBase64 = content[13:]
|
imageBase64 = resp.Image
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
})
|
})
|
||||||
|
|
||||||
p.Stop()
|
p.StopAndClear()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
|
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
|
||||||
continue
|
continue
|
||||||
@@ -397,12 +351,13 @@ func sanitizeFilename(s string) string {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// printInteractiveHelp prints help for interactive mode commands.
|
// printInteractiveHelp prints help for interactive mode commands.
|
||||||
func printInteractiveHelp(opts ImageGenOptions) {
|
// TODO: reconcile /set commands with /set parameter in text gen REPL (cmd/cmd.go)
|
||||||
|
func printInteractiveHelp() {
|
||||||
fmt.Fprintln(os.Stderr, "Commands:")
|
fmt.Fprintln(os.Stderr, "Commands:")
|
||||||
fmt.Fprintln(os.Stderr, " /set width <n> Set image width (current:", opts.Width, ")")
|
fmt.Fprintln(os.Stderr, " /set width <n> Set image width")
|
||||||
fmt.Fprintln(os.Stderr, " /set height <n> Set image height (current:", opts.Height, ")")
|
fmt.Fprintln(os.Stderr, " /set height <n> Set image height")
|
||||||
fmt.Fprintln(os.Stderr, " /set steps <n> Set denoising steps (current:", opts.Steps, ")")
|
fmt.Fprintln(os.Stderr, " /set steps <n> Set denoising steps")
|
||||||
fmt.Fprintln(os.Stderr, " /set seed <n> Set random seed (current:", opts.Seed, ", 0=random)")
|
fmt.Fprintln(os.Stderr, " /set seed <n> Set random seed")
|
||||||
fmt.Fprintln(os.Stderr, " /set negative <s> Set negative prompt")
|
fmt.Fprintln(os.Stderr, " /set negative <s> Set negative prompt")
|
||||||
fmt.Fprintln(os.Stderr, " /show Show current settings")
|
fmt.Fprintln(os.Stderr, " /show Show current settings")
|
||||||
fmt.Fprintln(os.Stderr, " /bye Exit")
|
fmt.Fprintln(os.Stderr, " /bye Exit")
|
||||||
|
|||||||
@@ -1,190 +0,0 @@
|
|||||||
// Package client provides client-side model creation for tensor-based models.
|
|
||||||
//
|
|
||||||
// This package is in x/ because the tensor model storage format is under development.
|
|
||||||
// It also exists to break an import cycle: server imports x/imagegen, so x/imagegen
|
|
||||||
// cannot import server. This sub-package can import server because server doesn't
|
|
||||||
// import it.
|
|
||||||
//
|
|
||||||
// TODO (jmorganca): This is temporary. When tensor models are promoted to production:
|
|
||||||
// 1. Add proper API endpoints for tensor model creation
|
|
||||||
// 2. Move tensor extraction to server-side
|
|
||||||
// 3. Remove this package
|
|
||||||
// 4. Follow the same client→server pattern as regular model creation
|
|
||||||
package client
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bytes"
|
|
||||||
"encoding/json"
|
|
||||||
"fmt"
|
|
||||||
"io"
|
|
||||||
|
|
||||||
"github.com/ollama/ollama/progress"
|
|
||||||
"github.com/ollama/ollama/server"
|
|
||||||
"github.com/ollama/ollama/types/model"
|
|
||||||
"github.com/ollama/ollama/x/imagegen"
|
|
||||||
)
|
|
||||||
|
|
||||||
// MinOllamaVersion is the minimum Ollama version required for image generation models.
|
|
||||||
const MinOllamaVersion = "0.14.0"
|
|
||||||
|
|
||||||
// CreateModel imports a tensor-based model from a local directory.
|
|
||||||
// This creates blobs and manifest directly on disk, bypassing the HTTP API.
|
|
||||||
// If quantize is "fp8", weights will be quantized to mxfp8 format during import.
|
|
||||||
//
|
|
||||||
// TODO (jmorganca): Replace with API-based creation when promoted to production.
|
|
||||||
func CreateModel(modelName, modelDir, quantize string, p *progress.Progress) error {
|
|
||||||
if !imagegen.IsTensorModelDir(modelDir) {
|
|
||||||
return fmt.Errorf("%s is not an image generation model directory (model_index.json not found)", modelDir)
|
|
||||||
}
|
|
||||||
|
|
||||||
status := "importing image generation model"
|
|
||||||
spinner := progress.NewSpinner(status)
|
|
||||||
p.Add("imagegen", spinner)
|
|
||||||
|
|
||||||
// Create layer callback for config files
|
|
||||||
createLayer := func(r io.Reader, mediaType, name string) (imagegen.LayerInfo, error) {
|
|
||||||
layer, err := server.NewLayer(r, mediaType)
|
|
||||||
if err != nil {
|
|
||||||
return imagegen.LayerInfo{}, err
|
|
||||||
}
|
|
||||||
layer.Name = name
|
|
||||||
|
|
||||||
return imagegen.LayerInfo{
|
|
||||||
Digest: layer.Digest,
|
|
||||||
Size: layer.Size,
|
|
||||||
MediaType: layer.MediaType,
|
|
||||||
Name: name,
|
|
||||||
}, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create tensor layer callback for individual tensors
|
|
||||||
// name is path-style: "component/tensor_name"
|
|
||||||
// When quantize is true, returns multiple layers (weight + scales)
|
|
||||||
createTensorLayer := func(r io.Reader, name, dtype string, shape []int32, doQuantize bool) ([]imagegen.LayerInfo, error) {
|
|
||||||
if doQuantize {
|
|
||||||
// Check if quantization is supported
|
|
||||||
if !QuantizeSupported() {
|
|
||||||
return nil, fmt.Errorf("quantization requires MLX support")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Quantize the tensor (affine mode returns weight, scales, qbiases)
|
|
||||||
qweightData, scalesData, qbiasData, _, _, _, err := quantizeTensor(r, name, dtype, shape)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("failed to quantize %s: %w", name, err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create layer for quantized weight
|
|
||||||
weightLayer, err := server.NewLayer(bytes.NewReader(qweightData), server.MediaTypeImageTensor)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create layer for scales (use _scale suffix convention)
|
|
||||||
scalesLayer, err := server.NewLayer(bytes.NewReader(scalesData), server.MediaTypeImageTensor)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
layers := []imagegen.LayerInfo{
|
|
||||||
{
|
|
||||||
Digest: weightLayer.Digest,
|
|
||||||
Size: weightLayer.Size,
|
|
||||||
MediaType: weightLayer.MediaType,
|
|
||||||
Name: name, // Keep original name for weight
|
|
||||||
},
|
|
||||||
{
|
|
||||||
Digest: scalesLayer.Digest,
|
|
||||||
Size: scalesLayer.Size,
|
|
||||||
MediaType: scalesLayer.MediaType,
|
|
||||||
Name: name + "_scale", // Add _scale suffix
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add qbiases layer if present (affine mode)
|
|
||||||
if qbiasData != nil {
|
|
||||||
qbiasLayer, err := server.NewLayer(bytes.NewReader(qbiasData), server.MediaTypeImageTensor)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
layers = append(layers, imagegen.LayerInfo{
|
|
||||||
Digest: qbiasLayer.Digest,
|
|
||||||
Size: qbiasLayer.Size,
|
|
||||||
MediaType: qbiasLayer.MediaType,
|
|
||||||
Name: name + "_qbias", // Add _qbias suffix
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
return layers, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Non-quantized path: just create a single layer
|
|
||||||
layer, err := server.NewLayer(r, server.MediaTypeImageTensor)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
return []imagegen.LayerInfo{
|
|
||||||
{
|
|
||||||
Digest: layer.Digest,
|
|
||||||
Size: layer.Size,
|
|
||||||
MediaType: layer.MediaType,
|
|
||||||
Name: name,
|
|
||||||
},
|
|
||||||
}, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create manifest writer callback
|
|
||||||
writeManifest := func(modelName string, config imagegen.LayerInfo, layers []imagegen.LayerInfo) error {
|
|
||||||
name := model.ParseName(modelName)
|
|
||||||
if !name.IsValid() {
|
|
||||||
return fmt.Errorf("invalid model name: %s", modelName)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create a proper config blob with version requirement
|
|
||||||
configData := model.ConfigV2{
|
|
||||||
ModelFormat: "safetensors",
|
|
||||||
Capabilities: []string{"image"},
|
|
||||||
Requires: MinOllamaVersion,
|
|
||||||
}
|
|
||||||
configJSON, err := json.Marshal(configData)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to marshal config: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create config layer blob
|
|
||||||
configLayer, err := server.NewLayer(bytes.NewReader(configJSON), "application/vnd.docker.container.image.v1+json")
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to create config layer: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Convert LayerInfo to server.Layer (include the original model_index.json in layers)
|
|
||||||
serverLayers := make([]server.Layer, len(layers))
|
|
||||||
for i, l := range layers {
|
|
||||||
serverLayers[i] = server.Layer{
|
|
||||||
MediaType: l.MediaType,
|
|
||||||
Digest: l.Digest,
|
|
||||||
Size: l.Size,
|
|
||||||
Name: l.Name,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return server.WriteManifest(name, configLayer, serverLayers)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Progress callback
|
|
||||||
progressFn := func(msg string) {
|
|
||||||
spinner.Stop()
|
|
||||||
status = msg
|
|
||||||
spinner = progress.NewSpinner(status)
|
|
||||||
p.Add("imagegen", spinner)
|
|
||||||
}
|
|
||||||
|
|
||||||
err := imagegen.CreateModel(modelName, modelDir, quantize, createLayer, createTensorLayer, writeManifest, progressFn)
|
|
||||||
spinner.Stop()
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
fmt.Printf("Created image generation model '%s'\n", modelName)
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
@@ -65,12 +65,12 @@ func (s *utf8Streamer) Flush() string {
|
|||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
|
||||||
func init() {
|
|
||||||
generationStream = mlx.NewStream()
|
|
||||||
}
|
|
||||||
|
|
||||||
// withStream runs fn with the generation stream as default
|
// withStream runs fn with the generation stream as default
|
||||||
func withStream(fn func()) {
|
func withStream(fn func()) {
|
||||||
|
// Lazy initialization of generationStream
|
||||||
|
if generationStream == nil {
|
||||||
|
generationStream = mlx.NewStream()
|
||||||
|
}
|
||||||
orig := mlx.GetDefaultStream()
|
orig := mlx.GetDefaultStream()
|
||||||
mlx.SetDefaultStream(generationStream)
|
mlx.SetDefaultStream(generationStream)
|
||||||
fn()
|
fn()
|
||||||
|
|||||||
@@ -7,12 +7,17 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"flag"
|
"flag"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"image"
|
||||||
|
_ "image/jpeg"
|
||||||
|
_ "image/png"
|
||||||
"log"
|
"log"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"runtime/pprof"
|
"runtime/pprof"
|
||||||
|
|
||||||
|
"github.com/ollama/ollama/x/imagegen"
|
||||||
"github.com/ollama/ollama/x/imagegen/mlx"
|
"github.com/ollama/ollama/x/imagegen/mlx"
|
||||||
|
"github.com/ollama/ollama/x/imagegen/models/flux2"
|
||||||
"github.com/ollama/ollama/x/imagegen/models/gemma3"
|
"github.com/ollama/ollama/x/imagegen/models/gemma3"
|
||||||
"github.com/ollama/ollama/x/imagegen/models/gpt_oss"
|
"github.com/ollama/ollama/x/imagegen/models/gpt_oss"
|
||||||
"github.com/ollama/ollama/x/imagegen/models/llama"
|
"github.com/ollama/ollama/x/imagegen/models/llama"
|
||||||
@@ -46,9 +51,9 @@ func main() {
|
|||||||
imagePath := flag.String("image", "", "Image path for multimodal models")
|
imagePath := flag.String("image", "", "Image path for multimodal models")
|
||||||
|
|
||||||
// Image generation params
|
// Image generation params
|
||||||
width := flag.Int("width", 1024, "Image width")
|
width := flag.Int("width", 0, "Image width (0 = auto from input or 1024)")
|
||||||
height := flag.Int("height", 1024, "Image height")
|
height := flag.Int("height", 0, "Image height (0 = auto from input or 1024)")
|
||||||
steps := flag.Int("steps", 9, "Denoising steps")
|
steps := flag.Int("steps", 0, "Denoising steps (0 = model default)")
|
||||||
seed := flag.Int64("seed", 42, "Random seed")
|
seed := flag.Int64("seed", 42, "Random seed")
|
||||||
out := flag.String("output", "output.png", "Output path")
|
out := flag.String("output", "output.png", "Output path")
|
||||||
|
|
||||||
@@ -61,6 +66,7 @@ func main() {
|
|||||||
|
|
||||||
// Legacy mode flags
|
// Legacy mode flags
|
||||||
zimageFlag := flag.Bool("zimage", false, "Z-Image generation")
|
zimageFlag := flag.Bool("zimage", false, "Z-Image generation")
|
||||||
|
flux2Flag := flag.Bool("flux2", false, "FLUX.2 Klein generation")
|
||||||
qwenImage := flag.Bool("qwen-image", false, "Qwen-Image text-to-image generation")
|
qwenImage := flag.Bool("qwen-image", false, "Qwen-Image text-to-image generation")
|
||||||
qwenImageEdit := flag.Bool("qwen-image-edit", false, "Qwen-Image-Edit image editing")
|
qwenImageEdit := flag.Bool("qwen-image-edit", false, "Qwen-Image-Edit image editing")
|
||||||
var inputImages stringSlice
|
var inputImages stringSlice
|
||||||
@@ -78,6 +84,11 @@ func main() {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check if MLX initialized successfully
|
||||||
|
if !mlx.IsMLXAvailable() {
|
||||||
|
log.Fatalf("MLX initialization failed: %v", mlx.GetMLXInitError())
|
||||||
|
}
|
||||||
|
|
||||||
// CPU profiling
|
// CPU profiling
|
||||||
if *cpuProfile != "" {
|
if *cpuProfile != "" {
|
||||||
f, err := os.Create(*cpuProfile)
|
f, err := os.Create(*cpuProfile)
|
||||||
@@ -117,6 +128,44 @@ func main() {
|
|||||||
if err == nil {
|
if err == nil {
|
||||||
err = saveImageArray(img, *out)
|
err = saveImageArray(img, *out)
|
||||||
}
|
}
|
||||||
|
case *flux2Flag:
|
||||||
|
m := &flux2.Model{}
|
||||||
|
if loadErr := m.Load(*modelPath); loadErr != nil {
|
||||||
|
log.Fatal(loadErr)
|
||||||
|
}
|
||||||
|
// Load input images with EXIF orientation correction
|
||||||
|
var loadedImages []image.Image
|
||||||
|
for _, path := range inputImages {
|
||||||
|
img, loadErr := loadImageWithEXIF(path)
|
||||||
|
if loadErr != nil {
|
||||||
|
log.Fatalf("Failed to load image %s: %v", path, loadErr)
|
||||||
|
}
|
||||||
|
loadedImages = append(loadedImages, img)
|
||||||
|
}
|
||||||
|
// When input images provided and user didn't override dimensions, use 0 to match input
|
||||||
|
fluxWidth := int32(*width)
|
||||||
|
fluxHeight := int32(*height)
|
||||||
|
if len(loadedImages) > 0 && *width == 0 && *height == 0 {
|
||||||
|
// Both unset, will auto-detect from input
|
||||||
|
} else if len(loadedImages) > 0 && *width == 0 {
|
||||||
|
fluxWidth = 0 // Compute from height + aspect ratio
|
||||||
|
} else if len(loadedImages) > 0 && *height == 0 {
|
||||||
|
fluxHeight = 0 // Compute from width + aspect ratio
|
||||||
|
}
|
||||||
|
var img *mlx.Array
|
||||||
|
img, err = m.GenerateFromConfig(context.Background(), &flux2.GenerateConfig{
|
||||||
|
Prompt: *prompt,
|
||||||
|
Width: fluxWidth,
|
||||||
|
Height: fluxHeight,
|
||||||
|
Steps: *steps,
|
||||||
|
GuidanceScale: float32(*cfgScale),
|
||||||
|
Seed: *seed,
|
||||||
|
CapturePath: *gpuCapture,
|
||||||
|
InputImages: loadedImages,
|
||||||
|
})
|
||||||
|
if err == nil {
|
||||||
|
err = saveImageArray(img, *out)
|
||||||
|
}
|
||||||
case *qwenImage:
|
case *qwenImage:
|
||||||
m, loadErr := qwen_image.LoadPersistent(*modelPath)
|
m, loadErr := qwen_image.LoadPersistent(*modelPath)
|
||||||
if loadErr != nil {
|
if loadErr != nil {
|
||||||
@@ -271,6 +320,8 @@ func detectModelKind(modelPath string) (string, error) {
|
|||||||
switch index.ClassName {
|
switch index.ClassName {
|
||||||
case "FluxPipeline", "ZImagePipeline":
|
case "FluxPipeline", "ZImagePipeline":
|
||||||
return "zimage", nil
|
return "zimage", nil
|
||||||
|
case "Flux2KleinPipeline":
|
||||||
|
return "flux2", nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return "zimage", nil
|
return "zimage", nil
|
||||||
@@ -291,3 +342,12 @@ func detectModelKind(modelPath string) (string, error) {
|
|||||||
|
|
||||||
return cfg.ModelType, nil
|
return cfg.ModelType, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// loadImageWithEXIF loads an image from a file path with EXIF orientation correction.
|
||||||
|
func loadImageWithEXIF(path string) (image.Image, error) {
|
||||||
|
data, err := os.ReadFile(path)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("read file: %w", err)
|
||||||
|
}
|
||||||
|
return imagegen.DecodeImage(data)
|
||||||
|
}
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ import (
|
|||||||
"encoding/base64"
|
"encoding/base64"
|
||||||
"fmt"
|
"fmt"
|
||||||
"image"
|
"image"
|
||||||
|
_ "image/jpeg"
|
||||||
"image/png"
|
"image/png"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
@@ -108,3 +109,160 @@ func clampF(v, min, max float32) float32 {
|
|||||||
}
|
}
|
||||||
return v
|
return v
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// DecodeImage decodes image bytes with EXIF orientation applied.
|
||||||
|
func DecodeImage(data []byte) (image.Image, error) {
|
||||||
|
orientation := readJPEGOrientation(data)
|
||||||
|
|
||||||
|
img, _, err := image.Decode(bytes.NewReader(data))
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return applyOrientation(img, orientation), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// readJPEGOrientation extracts EXIF orientation from JPEG bytes.
|
||||||
|
// Returns 1 (normal) for non-JPEG or if orientation not found.
|
||||||
|
func readJPEGOrientation(data []byte) int {
|
||||||
|
if len(data) < 2 || data[0] != 0xFF || data[1] != 0xD8 {
|
||||||
|
return 1 // Not JPEG
|
||||||
|
}
|
||||||
|
|
||||||
|
r := bytes.NewReader(data[2:])
|
||||||
|
for {
|
||||||
|
var marker [2]byte
|
||||||
|
if _, err := r.Read(marker[:]); err != nil || marker[0] != 0xFF {
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
if marker[1] == 0xE1 { // APP1 (EXIF)
|
||||||
|
var lenBytes [2]byte
|
||||||
|
if _, err := r.Read(lenBytes[:]); err != nil {
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
segLen := int(uint16(lenBytes[0])<<8|uint16(lenBytes[1])) - 2
|
||||||
|
if segLen < 14 {
|
||||||
|
r.Seek(int64(segLen), 1)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
seg := make([]byte, segLen)
|
||||||
|
if _, err := r.Read(seg); err != nil {
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
if string(seg[:4]) == "Exif" && seg[4] == 0 && seg[5] == 0 {
|
||||||
|
return parseTIFFOrientation(seg[6:])
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if marker[1] == 0xD9 || marker[1] == 0xDA {
|
||||||
|
return 1 // EOI or SOS
|
||||||
|
}
|
||||||
|
if marker[1] >= 0xD0 && marker[1] <= 0xD7 {
|
||||||
|
continue // RST markers
|
||||||
|
}
|
||||||
|
|
||||||
|
var lenBytes [2]byte
|
||||||
|
if _, err := r.Read(lenBytes[:]); err != nil {
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
segLen := int(uint16(lenBytes[0])<<8|uint16(lenBytes[1])) - 2
|
||||||
|
if segLen > 0 {
|
||||||
|
r.Seek(int64(segLen), 1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseTIFFOrientation(tiff []byte) int {
|
||||||
|
if len(tiff) < 8 {
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
var big bool
|
||||||
|
switch string(tiff[:2]) {
|
||||||
|
case "MM":
|
||||||
|
big = true
|
||||||
|
case "II":
|
||||||
|
big = false
|
||||||
|
default:
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
u16 := func(b []byte) uint16 {
|
||||||
|
if big {
|
||||||
|
return uint16(b[0])<<8 | uint16(b[1])
|
||||||
|
}
|
||||||
|
return uint16(b[1])<<8 | uint16(b[0])
|
||||||
|
}
|
||||||
|
u32 := func(b []byte) uint32 {
|
||||||
|
if big {
|
||||||
|
return uint32(b[0])<<24 | uint32(b[1])<<16 | uint32(b[2])<<8 | uint32(b[3])
|
||||||
|
}
|
||||||
|
return uint32(b[3])<<24 | uint32(b[2])<<16 | uint32(b[1])<<8 | uint32(b[0])
|
||||||
|
}
|
||||||
|
|
||||||
|
if u16(tiff[2:4]) != 42 {
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
ifdOffset := u32(tiff[4:8])
|
||||||
|
if int(ifdOffset)+2 > len(tiff) {
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
numEntries := u16(tiff[ifdOffset : ifdOffset+2])
|
||||||
|
for i := range int(numEntries) {
|
||||||
|
offset := ifdOffset + 2 + uint32(i)*12
|
||||||
|
if int(offset)+12 > len(tiff) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if u16(tiff[offset:offset+2]) == 0x0112 { // Orientation tag
|
||||||
|
o := int(u16(tiff[offset+8 : offset+10]))
|
||||||
|
if o >= 1 && o <= 8 {
|
||||||
|
return o
|
||||||
|
}
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
func applyOrientation(img image.Image, orientation int) image.Image {
|
||||||
|
if orientation <= 1 || orientation > 8 {
|
||||||
|
return img
|
||||||
|
}
|
||||||
|
|
||||||
|
bounds := img.Bounds()
|
||||||
|
w, h := bounds.Dx(), bounds.Dy()
|
||||||
|
|
||||||
|
outW, outH := w, h
|
||||||
|
if orientation >= 5 {
|
||||||
|
outW, outH = h, w
|
||||||
|
}
|
||||||
|
|
||||||
|
out := image.NewRGBA(image.Rect(0, 0, outW, outH))
|
||||||
|
for y := range h {
|
||||||
|
for x := range w {
|
||||||
|
var dx, dy int
|
||||||
|
switch orientation {
|
||||||
|
case 2:
|
||||||
|
dx, dy = w-1-x, y
|
||||||
|
case 3:
|
||||||
|
dx, dy = w-1-x, h-1-y
|
||||||
|
case 4:
|
||||||
|
dx, dy = x, h-1-y
|
||||||
|
case 5:
|
||||||
|
dx, dy = y, x
|
||||||
|
case 6:
|
||||||
|
dx, dy = h-1-y, x
|
||||||
|
case 7:
|
||||||
|
dx, dy = h-1-y, w-1-x
|
||||||
|
case 8:
|
||||||
|
dx, dy = y, w-1-x
|
||||||
|
}
|
||||||
|
out.Set(dx, dy, img.At(x+bounds.Min.X, y+bounds.Min.Y))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|||||||
@@ -175,3 +175,63 @@ func (m *ModelManifest) HasTensorLayers() bool {
|
|||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ModelInfo contains metadata about an image generation model.
|
||||||
|
type ModelInfo struct {
|
||||||
|
Architecture string
|
||||||
|
ParameterCount int64
|
||||||
|
Quantization string
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetModelInfo returns metadata about an image generation model.
|
||||||
|
func GetModelInfo(modelName string) (*ModelInfo, error) {
|
||||||
|
manifest, err := LoadManifest(modelName)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to load manifest: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
info := &ModelInfo{}
|
||||||
|
|
||||||
|
// Read model_index.json for architecture, parameter count, and quantization
|
||||||
|
if data, err := manifest.ReadConfig("model_index.json"); err == nil {
|
||||||
|
var index struct {
|
||||||
|
Architecture string `json:"architecture"`
|
||||||
|
ParameterCount int64 `json:"parameter_count"`
|
||||||
|
Quantization string `json:"quantization"`
|
||||||
|
}
|
||||||
|
if json.Unmarshal(data, &index) == nil {
|
||||||
|
info.Architecture = index.Architecture
|
||||||
|
info.ParameterCount = index.ParameterCount
|
||||||
|
info.Quantization = index.Quantization
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback: detect quantization from tensor names if not in config
|
||||||
|
if info.Quantization == "" {
|
||||||
|
for _, layer := range manifest.Manifest.Layers {
|
||||||
|
if strings.HasSuffix(layer.Name, ".weight_scale") {
|
||||||
|
info.Quantization = "FP8"
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if info.Quantization == "" {
|
||||||
|
info.Quantization = "BF16"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback: estimate parameter count if not in config
|
||||||
|
if info.ParameterCount == 0 {
|
||||||
|
var totalSize int64
|
||||||
|
for _, layer := range manifest.Manifest.Layers {
|
||||||
|
if layer.MediaType == "application/vnd.ollama.image.tensor" {
|
||||||
|
if !strings.HasSuffix(layer.Name, "_scale") && !strings.HasSuffix(layer.Name, "_qbias") {
|
||||||
|
totalSize += layer.Size
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Assume BF16 (2 bytes/param) as rough estimate
|
||||||
|
info.ParameterCount = totalSize / 2
|
||||||
|
}
|
||||||
|
|
||||||
|
return info, nil
|
||||||
|
}
|
||||||
|
|||||||
@@ -25,8 +25,7 @@ var SupportedBackends = []string{"metal", "cuda", "cpu"}
|
|||||||
// modelVRAMEstimates maps pipeline class names to their estimated VRAM requirements.
|
// modelVRAMEstimates maps pipeline class names to their estimated VRAM requirements.
|
||||||
var modelVRAMEstimates = map[string]uint64{
|
var modelVRAMEstimates = map[string]uint64{
|
||||||
"ZImagePipeline": 21 * GB, // ~21GB for Z-Image (text encoder + transformer + VAE)
|
"ZImagePipeline": 21 * GB, // ~21GB for Z-Image (text encoder + transformer + VAE)
|
||||||
"FluxPipeline": 21 * GB, // ~21GB for Flux (same architecture)
|
"FluxPipeline": 20 * GB, // ~20GB for Flux
|
||||||
"QwenImagePipeline": 80 * GB, // TODO: verify actual requirements, using conservative estimate for now
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// CheckPlatformSupport validates that image generation is supported on the current platform.
|
// CheckPlatformSupport validates that image generation is supported on the current platform.
|
||||||
@@ -72,31 +71,38 @@ func ResolveModelName(modelName string) string {
|
|||||||
// EstimateVRAM returns the estimated VRAM needed for an image generation model.
|
// EstimateVRAM returns the estimated VRAM needed for an image generation model.
|
||||||
// Returns a conservative default of 21GB if the model type cannot be determined.
|
// Returns a conservative default of 21GB if the model type cannot be determined.
|
||||||
func EstimateVRAM(modelName string) uint64 {
|
func EstimateVRAM(modelName string) uint64 {
|
||||||
manifest, err := LoadManifest(modelName)
|
className := DetectModelType(modelName)
|
||||||
if err != nil {
|
if estimate, ok := modelVRAMEstimates[className]; ok {
|
||||||
return 21 * GB
|
|
||||||
}
|
|
||||||
|
|
||||||
data, err := manifest.ReadConfig("model_index.json")
|
|
||||||
if err != nil {
|
|
||||||
return 21 * GB
|
|
||||||
}
|
|
||||||
|
|
||||||
// Parse just the class name
|
|
||||||
var index struct {
|
|
||||||
ClassName string `json:"_class_name"`
|
|
||||||
}
|
|
||||||
if err := json.Unmarshal(data, &index); err != nil {
|
|
||||||
return 21 * GB
|
|
||||||
}
|
|
||||||
|
|
||||||
if estimate, ok := modelVRAMEstimates[index.ClassName]; ok {
|
|
||||||
return estimate
|
return estimate
|
||||||
}
|
}
|
||||||
return 21 * GB
|
return 21 * GB
|
||||||
}
|
}
|
||||||
|
|
||||||
// HasTensorLayers checks if the given model has tensor layers.
|
// DetectModelType reads model_index.json and returns the model type.
|
||||||
func HasTensorLayers(modelName string) bool {
|
// Checks both "architecture" (Ollama format) and "_class_name" (diffusers format).
|
||||||
return ResolveModelName(modelName) != ""
|
// Returns empty string if detection fails.
|
||||||
|
func DetectModelType(modelName string) string {
|
||||||
|
manifest, err := LoadManifest(modelName)
|
||||||
|
if err != nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
data, err := manifest.ReadConfig("model_index.json")
|
||||||
|
if err != nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
var index struct {
|
||||||
|
Architecture string `json:"architecture"`
|
||||||
|
ClassName string `json:"_class_name"`
|
||||||
|
}
|
||||||
|
if err := json.Unmarshal(data, &index); err != nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// Prefer architecture (Ollama format), fall back to _class_name (diffusers)
|
||||||
|
if index.Architecture != "" {
|
||||||
|
return index.Architecture
|
||||||
|
}
|
||||||
|
return index.ClassName
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -73,8 +73,7 @@ func TestModelVRAMEstimates(t *testing.T) {
|
|||||||
// Verify the VRAM estimates map has expected entries
|
// Verify the VRAM estimates map has expected entries
|
||||||
expected := map[string]uint64{
|
expected := map[string]uint64{
|
||||||
"ZImagePipeline": 21 * GB,
|
"ZImagePipeline": 21 * GB,
|
||||||
"FluxPipeline": 21 * GB,
|
"FluxPipeline": 20 * GB,
|
||||||
"QwenImagePipeline": 80 * GB,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for name, expectedVRAM := range expected {
|
for name, expectedVRAM := range expected {
|
||||||
@@ -94,13 +93,6 @@ func TestEstimateVRAMDefault(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestHasTensorLayers(t *testing.T) {
|
|
||||||
// Non-existent model should return false
|
|
||||||
if HasTensorLayers("nonexistent-model") {
|
|
||||||
t.Error("HasTensorLayers() should return false for non-existent model")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestResolveModelName(t *testing.T) {
|
func TestResolveModelName(t *testing.T) {
|
||||||
// Non-existent model should return empty string
|
// Non-existent model should return empty string
|
||||||
result := ResolveModelName("nonexistent-model")
|
result := ResolveModelName("nonexistent-model")
|
||||||
|
|||||||