mirror of
https://github.com/ollama/ollama.git
synced 2026-01-19 04:51:17 -05:00
Compare commits
5 Commits
v0.14.0-rc
...
parth/decr
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6b2abfb433 | ||
|
|
805ed4644c | ||
|
|
e4b488a7b5 | ||
|
|
98079ddd79 | ||
|
|
d70942f47b |
6
.github/workflows/release.yaml
vendored
6
.github/workflows/release.yaml
vendored
@@ -372,13 +372,17 @@ jobs:
|
||||
outputs: type=local,dest=dist/${{ matrix.os }}-${{ matrix.arch }}
|
||||
cache-from: type=registry,ref=${{ vars.DOCKER_REPO }}:latest
|
||||
cache-to: type=inline
|
||||
- name: Deduplicate CUDA libraries
|
||||
run: |
|
||||
./scripts/deduplicate_cuda_libs.sh dist/${{ matrix.os }}-${{ matrix.arch }}
|
||||
- run: |
|
||||
for COMPONENT in bin/* lib/ollama/*; do
|
||||
case "$COMPONENT" in
|
||||
bin/ollama) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
|
||||
bin/ollama*) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
|
||||
lib/ollama/*.so*) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
|
||||
lib/ollama/cuda_v*) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
|
||||
lib/ollama/vulkan*) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
|
||||
lib/ollama/mlx*) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
|
||||
lib/ollama/cuda_jetpack5) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-jetpack5.tar.in ;;
|
||||
lib/ollama/cuda_jetpack6) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-jetpack6.tar.in ;;
|
||||
lib/ollama/rocm) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-rocm.tar.in ;;
|
||||
|
||||
@@ -48,53 +48,12 @@ if echo $PLATFORM | grep "amd64" > /dev/null; then
|
||||
.
|
||||
fi
|
||||
|
||||
# Deduplicate CUDA libraries across mlx_* and cuda_* directories
|
||||
deduplicate_cuda_libs() {
|
||||
local base_dir="$1"
|
||||
echo "Deduplicating CUDA libraries in ${base_dir}..."
|
||||
|
||||
# Find all mlx_cuda_* directories
|
||||
for mlx_dir in "${base_dir}"/lib/ollama/mlx_cuda_*; do
|
||||
[ -d "${mlx_dir}" ] || continue
|
||||
|
||||
# Extract CUDA version (e.g., v12, v13)
|
||||
cuda_version=$(basename "${mlx_dir}" | sed 's/mlx_cuda_//')
|
||||
cuda_dir="${base_dir}/lib/ollama/cuda_${cuda_version}"
|
||||
|
||||
# Skip if corresponding cuda_* directory doesn't exist
|
||||
[ -d "${cuda_dir}" ] || continue
|
||||
|
||||
echo " Checking ${mlx_dir} against ${cuda_dir}..."
|
||||
|
||||
# Find all .so* files in mlx directory
|
||||
find "${mlx_dir}" -type f -name "*.so*" | while read mlx_file; do
|
||||
filename=$(basename "${mlx_file}")
|
||||
cuda_file="${cuda_dir}/${filename}"
|
||||
|
||||
# Skip if file doesn't exist in cuda directory
|
||||
[ -f "${cuda_file}" ] || continue
|
||||
|
||||
# Compare checksums
|
||||
mlx_sum=$(sha256sum "${mlx_file}" | awk '{print $1}')
|
||||
cuda_sum=$(sha256sum "${cuda_file}" | awk '{print $1}')
|
||||
|
||||
if [ "${mlx_sum}" = "${cuda_sum}" ]; then
|
||||
echo " Deduplicating ${filename}"
|
||||
# Calculate relative path from mlx_dir to cuda_dir
|
||||
rel_path="../cuda_${cuda_version}/${filename}"
|
||||
rm -f "${mlx_file}"
|
||||
ln -s "${rel_path}" "${mlx_file}"
|
||||
fi
|
||||
done
|
||||
done
|
||||
}
|
||||
|
||||
# Run deduplication for each platform output directory
|
||||
if echo $PLATFORM | grep "," > /dev/null ; then
|
||||
deduplicate_cuda_libs "./dist/linux_amd64"
|
||||
deduplicate_cuda_libs "./dist/linux_arm64"
|
||||
$(dirname $0)/deduplicate_cuda_libs.sh "./dist/linux_amd64"
|
||||
$(dirname $0)/deduplicate_cuda_libs.sh "./dist/linux_arm64"
|
||||
elif echo $PLATFORM | grep "amd64\|arm64" > /dev/null ; then
|
||||
deduplicate_cuda_libs "./dist"
|
||||
$(dirname $0)/deduplicate_cuda_libs.sh "./dist"
|
||||
fi
|
||||
|
||||
# buildx behavior changes for single vs. multiplatform
|
||||
|
||||
60
scripts/deduplicate_cuda_libs.sh
Executable file
60
scripts/deduplicate_cuda_libs.sh
Executable file
@@ -0,0 +1,60 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Deduplicate CUDA libraries across mlx_* and cuda_* directories
|
||||
# This script finds identical .so* files in mlx_cuda_* directories that exist
|
||||
# in corresponding cuda_* directories and replaces them with symlinks.
|
||||
#
|
||||
|
||||
set -eu
|
||||
|
||||
if [ $# -eq 0 ]; then
|
||||
echo "ERROR: No directory specified" >&2
|
||||
echo "Usage: $0 <base_directory>" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
base_dir="$1"
|
||||
|
||||
if [ ! -d "${base_dir}" ]; then
|
||||
echo "ERROR: Directory ${base_dir} does not exist" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Deduplicating CUDA libraries in ${base_dir}..."
|
||||
|
||||
# Find all mlx_cuda_* directories
|
||||
for mlx_dir in "${base_dir}"/lib/ollama/mlx_cuda_*; do
|
||||
[ -d "${mlx_dir}" ] || continue
|
||||
|
||||
# Extract CUDA version (e.g., v12, v13)
|
||||
cuda_version=$(basename "${mlx_dir}" | sed 's/mlx_cuda_//')
|
||||
cuda_dir="${base_dir}/lib/ollama/cuda_${cuda_version}"
|
||||
|
||||
# Skip if corresponding cuda_* directory doesn't exist
|
||||
[ -d "${cuda_dir}" ] || continue
|
||||
|
||||
echo " Checking ${mlx_dir} against ${cuda_dir}..."
|
||||
|
||||
# Find all .so* files in mlx directory
|
||||
find "${mlx_dir}" -type f -name "*.so*" | while read mlx_file; do
|
||||
filename=$(basename "${mlx_file}")
|
||||
cuda_file="${cuda_dir}/${filename}"
|
||||
|
||||
# Skip if file doesn't exist in cuda directory
|
||||
[ -f "${cuda_file}" ] || continue
|
||||
|
||||
# Compare checksums
|
||||
mlx_sum=$(sha256sum "${mlx_file}" | awk '{print $1}')
|
||||
cuda_sum=$(sha256sum "${cuda_file}" | awk '{print $1}')
|
||||
|
||||
if [ "${mlx_sum}" = "${cuda_sum}" ]; then
|
||||
echo " Deduplicating ${filename}"
|
||||
# Calculate relative path from mlx_dir to cuda_dir
|
||||
rel_path="../cuda_${cuda_version}/${filename}"
|
||||
rm -f "${mlx_file}"
|
||||
ln -s "${rel_path}" "${mlx_file}"
|
||||
fi
|
||||
done
|
||||
done
|
||||
|
||||
echo "Deduplication complete"
|
||||
@@ -95,11 +95,48 @@ func (p *blobDownloadPart) UnmarshalJSON(b []byte) error {
|
||||
}
|
||||
|
||||
const (
|
||||
numDownloadParts = 16
|
||||
// numDownloadParts is the default number of concurrent download parts for standard downloads
|
||||
numDownloadParts = 16
|
||||
// numHFDownloadParts is the reduced number of concurrent download parts for HuggingFace
|
||||
// downloads to avoid triggering rate limits (HTTP 429 errors). See GitHub issue #13297.
|
||||
numHFDownloadParts = 4
|
||||
minDownloadPartSize int64 = 100 * format.MegaByte
|
||||
maxDownloadPartSize int64 = 1000 * format.MegaByte
|
||||
)
|
||||
|
||||
// isHuggingFaceURL returns true if the URL is from a HuggingFace domain.
|
||||
// This includes:
|
||||
// - huggingface.co (main domain)
|
||||
// - *.huggingface.co (subdomains like cdn-lfs.huggingface.co)
|
||||
// - hf.co (shortlink domain)
|
||||
// - *.hf.co (CDN domains like cdn-lfs.hf.co, cdn-lfs3.hf.co)
|
||||
func isHuggingFaceURL(u *url.URL) bool {
|
||||
if u == nil {
|
||||
return false
|
||||
}
|
||||
host := strings.ToLower(u.Hostname())
|
||||
return host == "huggingface.co" ||
|
||||
strings.HasSuffix(host, ".huggingface.co") ||
|
||||
host == "hf.co" ||
|
||||
strings.HasSuffix(host, ".hf.co")
|
||||
}
|
||||
|
||||
// getNumDownloadParts returns the number of concurrent download parts to use
|
||||
// for the given URL. HuggingFace URLs use reduced concurrency (default 4) to
|
||||
// avoid triggering rate limits. This can be overridden via the OLLAMA_HF_CONCURRENCY
|
||||
// environment variable. For non-HuggingFace URLs, returns the standard concurrency (16).
|
||||
func getNumDownloadParts(u *url.URL) int {
|
||||
if isHuggingFaceURL(u) {
|
||||
if v := os.Getenv("OLLAMA_HF_CONCURRENCY"); v != "" {
|
||||
if n, err := strconv.Atoi(v); err == nil && n > 0 {
|
||||
return n
|
||||
}
|
||||
}
|
||||
return numHFDownloadParts
|
||||
}
|
||||
return numDownloadParts
|
||||
}
|
||||
|
||||
func (p *blobDownloadPart) Name() string {
|
||||
return strings.Join([]string{
|
||||
p.blobDownload.Name, "partial", strconv.Itoa(p.N),
|
||||
@@ -271,7 +308,11 @@ func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *regis
|
||||
}
|
||||
|
||||
g, inner := errgroup.WithContext(ctx)
|
||||
g.SetLimit(numDownloadParts)
|
||||
concurrency := getNumDownloadParts(directURL)
|
||||
if concurrency != numDownloadParts {
|
||||
slog.Info(fmt.Sprintf("using reduced concurrency (%d) for HuggingFace download", concurrency))
|
||||
}
|
||||
g.SetLimit(concurrency)
|
||||
for i := range b.Parts {
|
||||
part := b.Parts[i]
|
||||
if part.Completed.Load() == part.Size {
|
||||
|
||||
194
server/download_test.go
Normal file
194
server/download_test.go
Normal file
@@ -0,0 +1,194 @@
|
||||
package server
|
||||
|
||||
import (
|
||||
"net/url"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestIsHuggingFaceURL(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
url string
|
||||
expected bool
|
||||
}{
|
||||
{
|
||||
name: "nil url",
|
||||
url: "",
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
name: "huggingface.co main domain",
|
||||
url: "https://huggingface.co/some/model",
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "cdn-lfs.huggingface.co subdomain",
|
||||
url: "https://cdn-lfs.huggingface.co/repos/abc/123",
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "cdn-lfs3.hf.co CDN domain",
|
||||
url: "https://cdn-lfs3.hf.co/repos/abc/123",
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "hf.co shortlink domain",
|
||||
url: "https://hf.co/model",
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "uppercase HuggingFace domain",
|
||||
url: "https://HUGGINGFACE.CO/model",
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "mixed case HF domain",
|
||||
url: "https://Cdn-Lfs.HF.Co/repos",
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "ollama registry",
|
||||
url: "https://registry.ollama.ai/v2/library/llama3",
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
name: "github.com",
|
||||
url: "https://github.com/ollama/ollama",
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
name: "fake huggingface domain",
|
||||
url: "https://nothuggingface.co/model",
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
name: "fake hf domain",
|
||||
url: "https://nothf.co/model",
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
name: "huggingface in path not host",
|
||||
url: "https://example.com/huggingface.co/model",
|
||||
expected: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
var u *url.URL
|
||||
if tc.url != "" {
|
||||
var err error
|
||||
u, err = url.Parse(tc.url)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to parse URL: %v", err)
|
||||
}
|
||||
}
|
||||
got := isHuggingFaceURL(u)
|
||||
assert.Equal(t, tc.expected, got)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetNumDownloadParts(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
url string
|
||||
envValue string
|
||||
expected int
|
||||
description string
|
||||
}{
|
||||
{
|
||||
name: "nil url returns default",
|
||||
url: "",
|
||||
envValue: "",
|
||||
expected: numDownloadParts,
|
||||
description: "nil URL should return standard concurrency",
|
||||
},
|
||||
{
|
||||
name: "ollama registry returns default",
|
||||
url: "https://registry.ollama.ai/v2/library/llama3",
|
||||
envValue: "",
|
||||
expected: numDownloadParts,
|
||||
description: "Ollama registry should use standard concurrency",
|
||||
},
|
||||
{
|
||||
name: "huggingface returns reduced default",
|
||||
url: "https://huggingface.co/model/repo",
|
||||
envValue: "",
|
||||
expected: numHFDownloadParts,
|
||||
description: "HuggingFace should use reduced concurrency",
|
||||
},
|
||||
{
|
||||
name: "hf.co CDN returns reduced default",
|
||||
url: "https://cdn-lfs3.hf.co/repos/abc/123",
|
||||
envValue: "",
|
||||
expected: numHFDownloadParts,
|
||||
description: "HuggingFace CDN should use reduced concurrency",
|
||||
},
|
||||
{
|
||||
name: "huggingface with env override",
|
||||
url: "https://huggingface.co/model/repo",
|
||||
envValue: "2",
|
||||
expected: 2,
|
||||
description: "OLLAMA_HF_CONCURRENCY should override default",
|
||||
},
|
||||
{
|
||||
name: "huggingface with higher env override",
|
||||
url: "https://huggingface.co/model/repo",
|
||||
envValue: "8",
|
||||
expected: 8,
|
||||
description: "OLLAMA_HF_CONCURRENCY can be set higher than default",
|
||||
},
|
||||
{
|
||||
name: "huggingface with invalid env (non-numeric)",
|
||||
url: "https://huggingface.co/model/repo",
|
||||
envValue: "invalid",
|
||||
expected: numHFDownloadParts,
|
||||
description: "Invalid OLLAMA_HF_CONCURRENCY should fall back to default",
|
||||
},
|
||||
{
|
||||
name: "huggingface with invalid env (zero)",
|
||||
url: "https://huggingface.co/model/repo",
|
||||
envValue: "0",
|
||||
expected: numHFDownloadParts,
|
||||
description: "Zero OLLAMA_HF_CONCURRENCY should fall back to default",
|
||||
},
|
||||
{
|
||||
name: "huggingface with invalid env (negative)",
|
||||
url: "https://huggingface.co/model/repo",
|
||||
envValue: "-1",
|
||||
expected: numHFDownloadParts,
|
||||
description: "Negative OLLAMA_HF_CONCURRENCY should fall back to default",
|
||||
},
|
||||
{
|
||||
name: "non-huggingface ignores env",
|
||||
url: "https://registry.ollama.ai/v2/library/llama3",
|
||||
envValue: "2",
|
||||
expected: numDownloadParts,
|
||||
description: "OLLAMA_HF_CONCURRENCY should not affect non-HF URLs",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
// Set or clear the environment variable
|
||||
if tc.envValue != "" {
|
||||
t.Setenv("OLLAMA_HF_CONCURRENCY", tc.envValue)
|
||||
}
|
||||
|
||||
var u *url.URL
|
||||
if tc.url != "" {
|
||||
var err error
|
||||
u, err = url.Parse(tc.url)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to parse URL: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
got := getNumDownloadParts(u)
|
||||
assert.Equal(t, tc.expected, got, tc.description)
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -123,11 +123,6 @@ func RegisterFlags(cmd *cobra.Command) {
|
||||
// Returns true if it handled the request, false if the caller should continue with normal flow.
|
||||
// Supports flags: --width, --height, --steps, --seed, --negative
|
||||
func RunCLI(cmd *cobra.Command, name string, prompt string, interactive bool, keepAlive *api.Duration) error {
|
||||
// Verify it's a valid image gen model
|
||||
if ResolveModelName(name) == "" {
|
||||
return fmt.Errorf("unknown image generation model: %s", name)
|
||||
}
|
||||
|
||||
// Get options from flags (with env var defaults)
|
||||
opts := DefaultOptions()
|
||||
if cmd != nil && cmd.Flags() != nil {
|
||||
@@ -511,10 +506,7 @@ func displayImageInTerminal(imagePath string) bool {
|
||||
// Send in chunks for large images
|
||||
const chunkSize = 4096
|
||||
for i := 0; i < len(encoded); i += chunkSize {
|
||||
end := i + chunkSize
|
||||
if end > len(encoded) {
|
||||
end = len(encoded)
|
||||
}
|
||||
end := min(i+chunkSize, len(encoded))
|
||||
chunk := encoded[i:end]
|
||||
|
||||
if i == 0 {
|
||||
|
||||
Reference in New Issue
Block a user