mirror of
https://github.com/ollama/ollama.git
synced 2026-01-19 04:51:17 -05:00
Compare commits
5 Commits
parth/decr
...
v0.14.1
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4adb9cf4bb | ||
|
|
74f475e735 | ||
|
|
875cecba74 | ||
|
|
7d411a4686 | ||
|
|
02a2401596 |
@@ -190,7 +190,7 @@ if(MLX_ENGINE)
|
|||||||
install(TARGETS mlx mlxc
|
install(TARGETS mlx mlxc
|
||||||
RUNTIME_DEPENDENCIES
|
RUNTIME_DEPENDENCIES
|
||||||
DIRECTORIES ${CUDAToolkit_BIN_DIR} ${CUDAToolkit_BIN_DIR}/x64 ${CUDAToolkit_LIBRARY_DIR}
|
DIRECTORIES ${CUDAToolkit_BIN_DIR} ${CUDAToolkit_BIN_DIR}/x64 ${CUDAToolkit_LIBRARY_DIR}
|
||||||
PRE_INCLUDE_REGEXES cublas cublasLt cudart nvrtc cudnn nccl
|
PRE_INCLUDE_REGEXES cublas cublasLt cudart nvrtc nvrtc-builtins cudnn nccl openblas gfortran
|
||||||
PRE_EXCLUDE_REGEXES ".*"
|
PRE_EXCLUDE_REGEXES ".*"
|
||||||
RUNTIME DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT MLX
|
RUNTIME DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT MLX
|
||||||
LIBRARY DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT MLX
|
LIBRARY DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT MLX
|
||||||
|
|||||||
@@ -110,7 +110,7 @@ More Ollama [Python example](https://github.com/ollama/ollama-python/blob/main/e
|
|||||||
import { Ollama } from "ollama";
|
import { Ollama } from "ollama";
|
||||||
|
|
||||||
const client = new Ollama();
|
const client = new Ollama();
|
||||||
const results = await client.webSearch({ query: "what is ollama?" });
|
const results = await client.webSearch("what is ollama?");
|
||||||
console.log(JSON.stringify(results, null, 2));
|
console.log(JSON.stringify(results, null, 2));
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -213,7 +213,7 @@ models](https://ollama.com/models)\n\nAvailable for macOS, Windows, and Linux',
|
|||||||
import { Ollama } from "ollama";
|
import { Ollama } from "ollama";
|
||||||
|
|
||||||
const client = new Ollama();
|
const client = new Ollama();
|
||||||
const fetchResult = await client.webFetch({ url: "https://ollama.com" });
|
const fetchResult = await client.webFetch("https://ollama.com");
|
||||||
console.log(JSON.stringify(fetchResult, null, 2));
|
console.log(JSON.stringify(fetchResult, null, 2));
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ Please refer to the [GPU docs](./gpu).
|
|||||||
|
|
||||||
## How can I specify the context window size?
|
## How can I specify the context window size?
|
||||||
|
|
||||||
By default, Ollama uses a context window size of 2048 tokens.
|
By default, Ollama uses a context window size of 4096 tokens.
|
||||||
|
|
||||||
This can be overridden with the `OLLAMA_CONTEXT_LENGTH` environment variable. For example, to set the default context window to 8K, use:
|
This can be overridden with the `OLLAMA_CONTEXT_LENGTH` environment variable. For example, to set the default context window to 8K, use:
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
---
|
---
|
||||||
title: "Linux"
|
title: Linux
|
||||||
---
|
---
|
||||||
|
|
||||||
## Install
|
## Install
|
||||||
@@ -13,14 +13,15 @@ curl -fsSL https://ollama.com/install.sh | sh
|
|||||||
## Manual install
|
## Manual install
|
||||||
|
|
||||||
<Note>
|
<Note>
|
||||||
If you are upgrading from a prior version, you should remove the old libraries with `sudo rm -rf /usr/lib/ollama` first.
|
If you are upgrading from a prior version, you should remove the old libraries
|
||||||
|
with `sudo rm -rf /usr/lib/ollama` first.
|
||||||
</Note>
|
</Note>
|
||||||
|
|
||||||
Download and extract the package:
|
Download and extract the package:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl -fsSL https://ollama.com/download/ollama-linux-amd64.tgz \
|
curl -fsSL https://ollama.com/download/ollama-linux-amd64.tar.zst \
|
||||||
| sudo tar zx -C /usr
|
| sudo tar x -C /usr
|
||||||
```
|
```
|
||||||
|
|
||||||
Start Ollama:
|
Start Ollama:
|
||||||
@@ -40,8 +41,8 @@ ollama -v
|
|||||||
If you have an AMD GPU, also download and extract the additional ROCm package:
|
If you have an AMD GPU, also download and extract the additional ROCm package:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl -fsSL https://ollama.com/download/ollama-linux-amd64-rocm.tgz \
|
curl -fsSL https://ollama.com/download/ollama-linux-amd64-rocm.tar.zst \
|
||||||
| sudo tar zx -C /usr
|
| sudo tar x -C /usr
|
||||||
```
|
```
|
||||||
|
|
||||||
### ARM64 install
|
### ARM64 install
|
||||||
@@ -49,8 +50,8 @@ curl -fsSL https://ollama.com/download/ollama-linux-amd64-rocm.tgz \
|
|||||||
Download and extract the ARM64-specific package:
|
Download and extract the ARM64-specific package:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl -fsSL https://ollama.com/download/ollama-linux-arm64.tgz \
|
curl -fsSL https://ollama.com/download/ollama-linux-arm64.tar.zst \
|
||||||
| sudo tar zx -C /usr
|
| sudo tar x -C /usr
|
||||||
```
|
```
|
||||||
|
|
||||||
### Adding Ollama as a startup service (recommended)
|
### Adding Ollama as a startup service (recommended)
|
||||||
@@ -112,7 +113,11 @@ sudo systemctl status ollama
|
|||||||
```
|
```
|
||||||
|
|
||||||
<Note>
|
<Note>
|
||||||
While AMD has contributed the `amdgpu` driver upstream to the official linux kernel source, the version is older and may not support all ROCm features. We recommend you install the latest driver from https://www.amd.com/en/support/linux-drivers for best support of your Radeon GPU.
|
While AMD has contributed the `amdgpu` driver upstream to the official linux
|
||||||
|
kernel source, the version is older and may not support all ROCm features. We
|
||||||
|
recommend you install the latest driver from
|
||||||
|
https://www.amd.com/en/support/linux-drivers for best support of your Radeon
|
||||||
|
GPU.
|
||||||
</Note>
|
</Note>
|
||||||
|
|
||||||
## Customizing
|
## Customizing
|
||||||
@@ -141,8 +146,8 @@ curl -fsSL https://ollama.com/install.sh | sh
|
|||||||
Or by re-downloading Ollama:
|
Or by re-downloading Ollama:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl -fsSL https://ollama.com/download/ollama-linux-amd64.tgz \
|
curl -fsSL https://ollama.com/download/ollama-linux-amd64.tar.zst \
|
||||||
| sudo tar zx -C /usr
|
| sudo tar x -C /usr
|
||||||
```
|
```
|
||||||
|
|
||||||
## Installing specific versions
|
## Installing specific versions
|
||||||
@@ -191,4 +196,4 @@ Remove the downloaded models and Ollama service user and group:
|
|||||||
sudo userdel ollama
|
sudo userdel ollama
|
||||||
sudo groupdel ollama
|
sudo groupdel ollama
|
||||||
sudo rm -r /usr/share/ollama
|
sudo rm -r /usr/share/ollama
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -179,7 +179,7 @@ _build_macapp() {
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
rm -f dist/Ollama-darwin.zip
|
rm -f dist/Ollama-darwin.zip
|
||||||
ditto -c -k --keepParent dist/Ollama.app dist/Ollama-darwin.zip
|
ditto -c -k --norsrc --keepParent dist/Ollama.app dist/Ollama-darwin.zip
|
||||||
(cd dist/Ollama.app/Contents/Resources/; tar -cf - ollama ollama-mlx *.so *.dylib *.metallib 2>/dev/null) | gzip -9vc > dist/ollama-darwin.tgz
|
(cd dist/Ollama.app/Contents/Resources/; tar -cf - ollama ollama-mlx *.so *.dylib *.metallib 2>/dev/null) | gzip -9vc > dist/ollama-darwin.tgz
|
||||||
|
|
||||||
# Notarize and Staple
|
# Notarize and Staple
|
||||||
@@ -187,7 +187,7 @@ _build_macapp() {
|
|||||||
$(xcrun -f notarytool) submit dist/Ollama-darwin.zip --wait --timeout 20m --apple-id "$APPLE_ID" --password "$APPLE_PASSWORD" --team-id "$APPLE_TEAM_ID"
|
$(xcrun -f notarytool) submit dist/Ollama-darwin.zip --wait --timeout 20m --apple-id "$APPLE_ID" --password "$APPLE_PASSWORD" --team-id "$APPLE_TEAM_ID"
|
||||||
rm -f dist/Ollama-darwin.zip
|
rm -f dist/Ollama-darwin.zip
|
||||||
$(xcrun -f stapler) staple dist/Ollama.app
|
$(xcrun -f stapler) staple dist/Ollama.app
|
||||||
ditto -c -k --keepParent dist/Ollama.app dist/Ollama-darwin.zip
|
ditto -c -k --norsrc --keepParent dist/Ollama.app dist/Ollama-darwin.zip
|
||||||
|
|
||||||
rm -f dist/Ollama.dmg
|
rm -f dist/Ollama.dmg
|
||||||
|
|
||||||
|
|||||||
@@ -95,48 +95,11 @@ func (p *blobDownloadPart) UnmarshalJSON(b []byte) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const (
|
const (
|
||||||
// numDownloadParts is the default number of concurrent download parts for standard downloads
|
numDownloadParts = 16
|
||||||
numDownloadParts = 16
|
|
||||||
// numHFDownloadParts is the reduced number of concurrent download parts for HuggingFace
|
|
||||||
// downloads to avoid triggering rate limits (HTTP 429 errors). See GitHub issue #13297.
|
|
||||||
numHFDownloadParts = 4
|
|
||||||
minDownloadPartSize int64 = 100 * format.MegaByte
|
minDownloadPartSize int64 = 100 * format.MegaByte
|
||||||
maxDownloadPartSize int64 = 1000 * format.MegaByte
|
maxDownloadPartSize int64 = 1000 * format.MegaByte
|
||||||
)
|
)
|
||||||
|
|
||||||
// isHuggingFaceURL returns true if the URL is from a HuggingFace domain.
|
|
||||||
// This includes:
|
|
||||||
// - huggingface.co (main domain)
|
|
||||||
// - *.huggingface.co (subdomains like cdn-lfs.huggingface.co)
|
|
||||||
// - hf.co (shortlink domain)
|
|
||||||
// - *.hf.co (CDN domains like cdn-lfs.hf.co, cdn-lfs3.hf.co)
|
|
||||||
func isHuggingFaceURL(u *url.URL) bool {
|
|
||||||
if u == nil {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
host := strings.ToLower(u.Hostname())
|
|
||||||
return host == "huggingface.co" ||
|
|
||||||
strings.HasSuffix(host, ".huggingface.co") ||
|
|
||||||
host == "hf.co" ||
|
|
||||||
strings.HasSuffix(host, ".hf.co")
|
|
||||||
}
|
|
||||||
|
|
||||||
// getNumDownloadParts returns the number of concurrent download parts to use
|
|
||||||
// for the given URL. HuggingFace URLs use reduced concurrency (default 4) to
|
|
||||||
// avoid triggering rate limits. This can be overridden via the OLLAMA_HF_CONCURRENCY
|
|
||||||
// environment variable. For non-HuggingFace URLs, returns the standard concurrency (16).
|
|
||||||
func getNumDownloadParts(u *url.URL) int {
|
|
||||||
if isHuggingFaceURL(u) {
|
|
||||||
if v := os.Getenv("OLLAMA_HF_CONCURRENCY"); v != "" {
|
|
||||||
if n, err := strconv.Atoi(v); err == nil && n > 0 {
|
|
||||||
return n
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return numHFDownloadParts
|
|
||||||
}
|
|
||||||
return numDownloadParts
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *blobDownloadPart) Name() string {
|
func (p *blobDownloadPart) Name() string {
|
||||||
return strings.Join([]string{
|
return strings.Join([]string{
|
||||||
p.blobDownload.Name, "partial", strconv.Itoa(p.N),
|
p.blobDownload.Name, "partial", strconv.Itoa(p.N),
|
||||||
@@ -308,11 +271,7 @@ func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *regis
|
|||||||
}
|
}
|
||||||
|
|
||||||
g, inner := errgroup.WithContext(ctx)
|
g, inner := errgroup.WithContext(ctx)
|
||||||
concurrency := getNumDownloadParts(directURL)
|
g.SetLimit(numDownloadParts)
|
||||||
if concurrency != numDownloadParts {
|
|
||||||
slog.Info(fmt.Sprintf("using reduced concurrency (%d) for HuggingFace download", concurrency))
|
|
||||||
}
|
|
||||||
g.SetLimit(concurrency)
|
|
||||||
for i := range b.Parts {
|
for i := range b.Parts {
|
||||||
part := b.Parts[i]
|
part := b.Parts[i]
|
||||||
if part.Completed.Load() == part.Size {
|
if part.Completed.Load() == part.Size {
|
||||||
|
|||||||
@@ -1,194 +0,0 @@
|
|||||||
package server
|
|
||||||
|
|
||||||
import (
|
|
||||||
"net/url"
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestIsHuggingFaceURL(t *testing.T) {
|
|
||||||
tests := []struct {
|
|
||||||
name string
|
|
||||||
url string
|
|
||||||
expected bool
|
|
||||||
}{
|
|
||||||
{
|
|
||||||
name: "nil url",
|
|
||||||
url: "",
|
|
||||||
expected: false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "huggingface.co main domain",
|
|
||||||
url: "https://huggingface.co/some/model",
|
|
||||||
expected: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "cdn-lfs.huggingface.co subdomain",
|
|
||||||
url: "https://cdn-lfs.huggingface.co/repos/abc/123",
|
|
||||||
expected: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "cdn-lfs3.hf.co CDN domain",
|
|
||||||
url: "https://cdn-lfs3.hf.co/repos/abc/123",
|
|
||||||
expected: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "hf.co shortlink domain",
|
|
||||||
url: "https://hf.co/model",
|
|
||||||
expected: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "uppercase HuggingFace domain",
|
|
||||||
url: "https://HUGGINGFACE.CO/model",
|
|
||||||
expected: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "mixed case HF domain",
|
|
||||||
url: "https://Cdn-Lfs.HF.Co/repos",
|
|
||||||
expected: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "ollama registry",
|
|
||||||
url: "https://registry.ollama.ai/v2/library/llama3",
|
|
||||||
expected: false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "github.com",
|
|
||||||
url: "https://github.com/ollama/ollama",
|
|
||||||
expected: false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "fake huggingface domain",
|
|
||||||
url: "https://nothuggingface.co/model",
|
|
||||||
expected: false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "fake hf domain",
|
|
||||||
url: "https://nothf.co/model",
|
|
||||||
expected: false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "huggingface in path not host",
|
|
||||||
url: "https://example.com/huggingface.co/model",
|
|
||||||
expected: false,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, tc := range tests {
|
|
||||||
t.Run(tc.name, func(t *testing.T) {
|
|
||||||
var u *url.URL
|
|
||||||
if tc.url != "" {
|
|
||||||
var err error
|
|
||||||
u, err = url.Parse(tc.url)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("failed to parse URL: %v", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
got := isHuggingFaceURL(u)
|
|
||||||
assert.Equal(t, tc.expected, got)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestGetNumDownloadParts(t *testing.T) {
|
|
||||||
tests := []struct {
|
|
||||||
name string
|
|
||||||
url string
|
|
||||||
envValue string
|
|
||||||
expected int
|
|
||||||
description string
|
|
||||||
}{
|
|
||||||
{
|
|
||||||
name: "nil url returns default",
|
|
||||||
url: "",
|
|
||||||
envValue: "",
|
|
||||||
expected: numDownloadParts,
|
|
||||||
description: "nil URL should return standard concurrency",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "ollama registry returns default",
|
|
||||||
url: "https://registry.ollama.ai/v2/library/llama3",
|
|
||||||
envValue: "",
|
|
||||||
expected: numDownloadParts,
|
|
||||||
description: "Ollama registry should use standard concurrency",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "huggingface returns reduced default",
|
|
||||||
url: "https://huggingface.co/model/repo",
|
|
||||||
envValue: "",
|
|
||||||
expected: numHFDownloadParts,
|
|
||||||
description: "HuggingFace should use reduced concurrency",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "hf.co CDN returns reduced default",
|
|
||||||
url: "https://cdn-lfs3.hf.co/repos/abc/123",
|
|
||||||
envValue: "",
|
|
||||||
expected: numHFDownloadParts,
|
|
||||||
description: "HuggingFace CDN should use reduced concurrency",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "huggingface with env override",
|
|
||||||
url: "https://huggingface.co/model/repo",
|
|
||||||
envValue: "2",
|
|
||||||
expected: 2,
|
|
||||||
description: "OLLAMA_HF_CONCURRENCY should override default",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "huggingface with higher env override",
|
|
||||||
url: "https://huggingface.co/model/repo",
|
|
||||||
envValue: "8",
|
|
||||||
expected: 8,
|
|
||||||
description: "OLLAMA_HF_CONCURRENCY can be set higher than default",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "huggingface with invalid env (non-numeric)",
|
|
||||||
url: "https://huggingface.co/model/repo",
|
|
||||||
envValue: "invalid",
|
|
||||||
expected: numHFDownloadParts,
|
|
||||||
description: "Invalid OLLAMA_HF_CONCURRENCY should fall back to default",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "huggingface with invalid env (zero)",
|
|
||||||
url: "https://huggingface.co/model/repo",
|
|
||||||
envValue: "0",
|
|
||||||
expected: numHFDownloadParts,
|
|
||||||
description: "Zero OLLAMA_HF_CONCURRENCY should fall back to default",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "huggingface with invalid env (negative)",
|
|
||||||
url: "https://huggingface.co/model/repo",
|
|
||||||
envValue: "-1",
|
|
||||||
expected: numHFDownloadParts,
|
|
||||||
description: "Negative OLLAMA_HF_CONCURRENCY should fall back to default",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "non-huggingface ignores env",
|
|
||||||
url: "https://registry.ollama.ai/v2/library/llama3",
|
|
||||||
envValue: "2",
|
|
||||||
expected: numDownloadParts,
|
|
||||||
description: "OLLAMA_HF_CONCURRENCY should not affect non-HF URLs",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, tc := range tests {
|
|
||||||
t.Run(tc.name, func(t *testing.T) {
|
|
||||||
// Set or clear the environment variable
|
|
||||||
if tc.envValue != "" {
|
|
||||||
t.Setenv("OLLAMA_HF_CONCURRENCY", tc.envValue)
|
|
||||||
}
|
|
||||||
|
|
||||||
var u *url.URL
|
|
||||||
if tc.url != "" {
|
|
||||||
var err error
|
|
||||||
u, err = url.Parse(tc.url)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("failed to parse URL: %v", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
got := getNumDownloadParts(u)
|
|
||||||
assert.Equal(t, tc.expected, got, tc.description)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Reference in New Issue
Block a user