mirror of
https://github.com/ollama/ollama.git
synced 2026-02-01 19:23:26 -05:00
Compare commits
5 Commits
v0.1.40
...
jyan/forma
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5dc5a295bf | ||
|
|
e21e6b2a33 | ||
|
|
a240ea3367 | ||
|
|
d4a86102fd | ||
|
|
476fb8e892 |
@@ -4,5 +4,5 @@ write-host "Welcome to Ollama!"
|
||||
write-host ""
|
||||
write-host "Run your first model:"
|
||||
write-host ""
|
||||
write-host "`tollama run llama2"
|
||||
write-host "`tollama run llama3"
|
||||
write-host ""
|
||||
@@ -2,32 +2,41 @@ package format
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
)
|
||||
|
||||
const (
|
||||
Thousand = 1000
|
||||
Million = Thousand * 1000
|
||||
Billion = Million * 1000
|
||||
Trillion = Billion * 1000
|
||||
)
|
||||
|
||||
func HumanNumber(b uint64) string {
|
||||
switch {
|
||||
case b >= Trillion:
|
||||
number := float64(b) / Trillion
|
||||
return fmt.Sprintf("%sT", DecimalPlace(number))
|
||||
case b >= Billion:
|
||||
number := float64(b) / Billion
|
||||
if number == math.Floor(number) {
|
||||
return fmt.Sprintf("%.0fB", number) // no decimals if whole number
|
||||
}
|
||||
return fmt.Sprintf("%.1fB", number) // one decimal if not a whole number
|
||||
return fmt.Sprintf("%sB", DecimalPlace(number))
|
||||
case b >= Million:
|
||||
number := float64(b) / Million
|
||||
if number == math.Floor(number) {
|
||||
return fmt.Sprintf("%.0fM", number) // no decimals if whole number
|
||||
}
|
||||
return fmt.Sprintf("%.2fM", number) // two decimals if not a whole number
|
||||
return fmt.Sprintf("%sM", DecimalPlace(number))
|
||||
case b >= Thousand:
|
||||
return fmt.Sprintf("%.0fK", float64(b)/Thousand)
|
||||
number := float64(b) / Thousand
|
||||
return fmt.Sprintf("%sK", DecimalPlace(number))
|
||||
default:
|
||||
return fmt.Sprintf("%d", b)
|
||||
}
|
||||
}
|
||||
|
||||
func DecimalPlace(number float64) string {
|
||||
switch {
|
||||
case number >= 100:
|
||||
return fmt.Sprintf("%.0f", number)
|
||||
case number >= 10:
|
||||
return fmt.Sprintf("%.1f", number)
|
||||
default:
|
||||
return fmt.Sprintf("%.2f", number)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,14 +13,15 @@ func TestHumanNumber(t *testing.T) {
|
||||
|
||||
testCases := []testCase{
|
||||
{0, "0"},
|
||||
{1000000, "1M"},
|
||||
{1000000, "1.00M"},
|
||||
{125000000, "125M"},
|
||||
{500500000, "500.50M"},
|
||||
{500550000, "500.55M"},
|
||||
{1000000000, "1B"},
|
||||
{2800000000, "2.8B"},
|
||||
{2850000000, "2.9B"},
|
||||
{1000000000000, "1000B"},
|
||||
{500500000, "500M"},
|
||||
{500550000, "501M"},
|
||||
{1000000000, "1.00B"},
|
||||
{2800000000, "2.80B"},
|
||||
{2850000000, "2.85B"},
|
||||
{28550000000, "28.6B"},
|
||||
{1000000000000, "1.00T"},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
|
||||
32
gpu/gpu.go
32
gpu/gpu.go
@@ -16,13 +16,12 @@ import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"unsafe"
|
||||
|
||||
"github.com/ollama/ollama/format"
|
||||
"github.com/ollama/ollama/envconfig"
|
||||
"github.com/ollama/ollama/format"
|
||||
)
|
||||
|
||||
type handles struct {
|
||||
@@ -105,8 +104,6 @@ func initGPUHandles() *handles {
|
||||
var cudartMgmtPatterns []string
|
||||
var nvcudaMgmtName string
|
||||
var nvcudaMgmtPatterns []string
|
||||
var oneapiMgmtName string
|
||||
var oneapiMgmtPatterns []string
|
||||
|
||||
tmpDir, _ := PayloadsDir()
|
||||
switch runtime.GOOS {
|
||||
@@ -118,8 +115,6 @@ func initGPUHandles() *handles {
|
||||
// Aligned with driver, we can't carry as payloads
|
||||
nvcudaMgmtName = "nvcuda.dll"
|
||||
nvcudaMgmtPatterns = NvcudaWindowsGlobs
|
||||
oneapiMgmtName = "ze_intel_gpu64.dll"
|
||||
oneapiMgmtPatterns = OneapiWindowsGlobs
|
||||
case "linux":
|
||||
cudartMgmtName = "libcudart.so*"
|
||||
if tmpDir != "" {
|
||||
@@ -130,8 +125,6 @@ func initGPUHandles() *handles {
|
||||
// Aligned with driver, we can't carry as payloads
|
||||
nvcudaMgmtName = "libcuda.so*"
|
||||
nvcudaMgmtPatterns = NvcudaLinuxGlobs
|
||||
oneapiMgmtName = "libze_intel_gpu.so"
|
||||
oneapiMgmtPatterns = OneapiLinuxGlobs
|
||||
default:
|
||||
return gpuHandles
|
||||
}
|
||||
@@ -159,17 +152,6 @@ func initGPUHandles() *handles {
|
||||
}
|
||||
}
|
||||
|
||||
oneapiLibPaths := FindGPULibs(oneapiMgmtName, oneapiMgmtPatterns)
|
||||
if len(oneapiLibPaths) > 0 {
|
||||
deviceCount, oneapi, libPath := LoadOneapiMgmt(oneapiLibPaths)
|
||||
if oneapi != nil {
|
||||
slog.Debug("detected Intel GPUs", "library", libPath, "count", deviceCount)
|
||||
gpuHandles.oneapi = oneapi
|
||||
gpuHandles.deviceCount = deviceCount
|
||||
return gpuHandles
|
||||
}
|
||||
}
|
||||
|
||||
return gpuHandles
|
||||
}
|
||||
|
||||
@@ -245,18 +227,6 @@ func GetGPUInfo() GpuInfoList {
|
||||
// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
|
||||
resp = append(resp, gpuInfo)
|
||||
}
|
||||
if gpuHandles.oneapi != nil {
|
||||
gpuInfo := GpuInfo{
|
||||
Library: "oneapi",
|
||||
}
|
||||
C.oneapi_check_vram(*gpuHandles.oneapi, &memInfo)
|
||||
var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
|
||||
memInfo.free = C.uint64_t(totalFreeMem)
|
||||
gpuInfo.TotalMemory = uint64(memInfo.total)
|
||||
gpuInfo.FreeMemory = uint64(memInfo.free)
|
||||
gpuInfo.ID = strconv.Itoa(i)
|
||||
resp = append(resp, gpuInfo)
|
||||
}
|
||||
}
|
||||
|
||||
// Then AMD
|
||||
|
||||
Reference in New Issue
Block a user