scripts: skip macOS symlink creation if already correct

scripts: use ollama.com/download for macOS with OLLAMA_VERSION support
build: fix Dockerfile mlx directory (#14131 )
2026-02-07 22:24:05 -05:00 · 2026-02-07 14:09:00 -08:00 · 2026-02-07 13:56:08 -08:00 · 2026-02-06 17:08:53 -08:00 · 2026-02-06 15:49:39 -08:00 · 2026-02-06 15:33:18 -08:00
4 changed files with 9 additions and 17 deletions
--- a/2
+++ b/2
@@ -147,7 +147,7 @@ ARG PARALLEL
 WORKDIR /go/src/github.com/ollama/ollama
 COPY CMakeLists.txt CMakePresets.json .
 COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
-COPY x/ml/backend/mlx x/ml/backend/mlx
+COPY x/imagegen/mlx x/imagegen/mlx
 COPY go.mod go.sum .
 COPY MLX_VERSION .
 RUN curl -fsSL https://golang.org/dl/go$(awk '/^go/ { print $2 }' go.mod).linux-$(case $(uname -m) in x86_64) echo amd64 ;; aarch64) echo arm64 ;; esac).tar.gz | tar xz -C /usr/local
--- a/anthropic/anthropic.go
+++ b/anthropic/anthropic.go
@@ -897,11 +897,5 @@ func countContentBlock(block any) int {
 		}
 	}

-	if source, ok := blockMap["source"].(map[string]any); ok {
-		if data, ok := source["data"].(string); ok {
-			total += len(data)
-		}
-	}
-
 	return total
 }
--- a/docs/faq.mdx
+++ b/docs/faq.mdx
@@ -312,7 +312,7 @@ Parallel request processing for a given model results in increasing the context
 The following server settings may be used to adjust how Ollama handles concurrent requests on most platforms:

 - `OLLAMA_MAX_LOADED_MODELS` - The maximum number of models that can be loaded concurrently provided they fit in available memory. The default is 3 \* the number of GPUs or 3 for CPU inference.
- `OLLAMA_NUM_PARALLEL` - The maximum number of parallel requests each model will process at the same time. The default will auto-select either 4 or 1 based on available memory.
+- `OLLAMA_NUM_PARALLEL` - The maximum number of parallel requests each model will process at the same time, default 1.  Required RAM will scale by `OLLAMA_NUM_PARALLEL` * `OLLAMA_CONTEXT_LENGTH`.
 - `OLLAMA_MAX_QUEUE` - The maximum number of requests Ollama will queue when busy before rejecting additional requests. The default is 512

 Note: Windows with Radeon GPUs currently default to 1 model maximum due to limitations in ROCm v5.7 for available VRAM reporting. Once ROCm v6.2 is available, Windows Radeon will follow the defaults above. You may enable concurrent model loads on Radeon on Windows, but ensure you don't load more models than will fit into your GPUs VRAM.
--- a/scripts/install.sh
+++ b/scripts/install.sh
@@ -49,11 +49,7 @@ if [ "$OS" = "Darwin" ]; then
        exit 1
    fi

-    if [ -n "${OLLAMA_VERSION:-}" ]; then
-        DOWNLOAD_URL="https://github.com/ollama/ollama/releases/download/${OLLAMA_VERSION}/Ollama-darwin.zip"
-    else
-        DOWNLOAD_URL="https://github.com/ollama/ollama/releases/latest/download/Ollama-darwin.zip"
-    fi
+    DOWNLOAD_URL="https://ollama.com/download/Ollama-darwin.zip${OLLAMA_VERSION:+?version=$OLLAMA_VERSION}"

    if pgrep -x Ollama >/dev/null 2>&1; then
        status "Stopping running Ollama instance..."
@@ -74,10 +70,12 @@ if [ "$OS" = "Darwin" ]; then
    unzip -q "$TEMP_DIR/Ollama-darwin.zip" -d "$TEMP_DIR"
    mv "$TEMP_DIR/Ollama.app" "/Applications/"

-    status "Adding 'ollama' command to PATH (may require password)..."
-    mkdir -p "/usr/local/bin" 2>/dev/null || sudo mkdir -p "/usr/local/bin"
-    ln -sf "/Applications/Ollama.app/Contents/Resources/ollama" "/usr/local/bin/ollama" 2>/dev/null || \
-        sudo ln -sf "/Applications/Ollama.app/Contents/Resources/ollama" "/usr/local/bin/ollama"
+    if [ ! -L "/usr/local/bin/ollama" ] || [ "$(readlink "/usr/local/bin/ollama")" != "/Applications/Ollama.app/Contents/Resources/ollama" ]; then
+        status "Adding 'ollama' command to PATH (may require password)..."
+        mkdir -p "/usr/local/bin" 2>/dev/null || sudo mkdir -p "/usr/local/bin"
+        ln -sf "/Applications/Ollama.app/Contents/Resources/ollama" "/usr/local/bin/ollama" 2>/dev/null || \
+            sudo ln -sf "/Applications/Ollama.app/Contents/Resources/ollama" "/usr/local/bin/ollama"
+    fi

    if [ -z "${OLLAMA_NO_START:-}" ]; then
        status "Starting Ollama..."
Author	SHA1	Message	Date
jmorganca	b80dc2668b	scripts: skip macOS symlink creation if already correct	2026-02-07 14:09:00 -08:00
jmorganca	94a6c871f6	scripts: use ollama.com/download for macOS with OLLAMA_VERSION support	2026-02-07 13:56:08 -08:00
Jeffrey Morgan	099a0f18ef	build: fix Dockerfile mlx directory (#14131 )	2026-02-06 17:08:53 -08:00
Richard Lyons	fff696ee31	docs: increased RAM requirement for parallelism	2026-02-06 15:49:39 -08:00
Jeffrey Morgan	2e3ce6eab3	anthropic: do not count image tokens for now (#14127 )	2026-02-06 15:33:18 -08:00