Compare commits

...

5 Commits

Author SHA1 Message Date
Josh Yan
5dc5a295bf added testcase 2024-06-03 17:28:05 -07:00
Josh Yan
e21e6b2a33 added testcase 2024-06-03 17:27:38 -07:00
Josh Yan
a240ea3367 humanNumbers formats to 3 digits, added trillion case for future 2024-06-03 17:26:02 -07:00
Jeffrey Morgan
d4a86102fd update welcome prompt in windows to llama3 (#4779) 2024-06-01 21:05:51 -07:00
Jeffrey Morgan
476fb8e892 Limit GPU lib search for now (#4777)
* fix oneapi errors on windows 10
2024-06-01 19:24:33 -07:00
4 changed files with 29 additions and 49 deletions

View File

@@ -4,5 +4,5 @@ write-host "Welcome to Ollama!"
write-host ""
write-host "Run your first model:"
write-host ""
write-host "`tollama run llama2"
write-host "`tollama run llama3"
write-host ""

View File

@@ -2,32 +2,41 @@ package format
import (
"fmt"
"math"
)
const (
Thousand = 1000
Million = Thousand * 1000
Billion = Million * 1000
Trillion = Billion * 1000
)
func HumanNumber(b uint64) string {
switch {
case b >= Trillion:
number := float64(b) / Trillion
return fmt.Sprintf("%sT", DecimalPlace(number))
case b >= Billion:
number := float64(b) / Billion
if number == math.Floor(number) {
return fmt.Sprintf("%.0fB", number) // no decimals if whole number
}
return fmt.Sprintf("%.1fB", number) // one decimal if not a whole number
return fmt.Sprintf("%sB", DecimalPlace(number))
case b >= Million:
number := float64(b) / Million
if number == math.Floor(number) {
return fmt.Sprintf("%.0fM", number) // no decimals if whole number
}
return fmt.Sprintf("%.2fM", number) // two decimals if not a whole number
return fmt.Sprintf("%sM", DecimalPlace(number))
case b >= Thousand:
return fmt.Sprintf("%.0fK", float64(b)/Thousand)
number := float64(b) / Thousand
return fmt.Sprintf("%sK", DecimalPlace(number))
default:
return fmt.Sprintf("%d", b)
}
}
func DecimalPlace(number float64) string {
switch {
case number >= 100:
return fmt.Sprintf("%.0f", number)
case number >= 10:
return fmt.Sprintf("%.1f", number)
default:
return fmt.Sprintf("%.2f", number)
}
}

View File

@@ -13,14 +13,15 @@ func TestHumanNumber(t *testing.T) {
testCases := []testCase{
{0, "0"},
{1000000, "1M"},
{1000000, "1.00M"},
{125000000, "125M"},
{500500000, "500.50M"},
{500550000, "500.55M"},
{1000000000, "1B"},
{2800000000, "2.8B"},
{2850000000, "2.9B"},
{1000000000000, "1000B"},
{500500000, "500M"},
{500550000, "501M"},
{1000000000, "1.00B"},
{2800000000, "2.80B"},
{2850000000, "2.85B"},
{28550000000, "28.6B"},
{1000000000000, "1.00T"},
}
for _, tc := range testCases {

View File

@@ -16,13 +16,12 @@ import (
"os"
"path/filepath"
"runtime"
"strconv"
"strings"
"sync"
"unsafe"
"github.com/ollama/ollama/format"
"github.com/ollama/ollama/envconfig"
"github.com/ollama/ollama/format"
)
type handles struct {
@@ -105,8 +104,6 @@ func initGPUHandles() *handles {
var cudartMgmtPatterns []string
var nvcudaMgmtName string
var nvcudaMgmtPatterns []string
var oneapiMgmtName string
var oneapiMgmtPatterns []string
tmpDir, _ := PayloadsDir()
switch runtime.GOOS {
@@ -118,8 +115,6 @@ func initGPUHandles() *handles {
// Aligned with driver, we can't carry as payloads
nvcudaMgmtName = "nvcuda.dll"
nvcudaMgmtPatterns = NvcudaWindowsGlobs
oneapiMgmtName = "ze_intel_gpu64.dll"
oneapiMgmtPatterns = OneapiWindowsGlobs
case "linux":
cudartMgmtName = "libcudart.so*"
if tmpDir != "" {
@@ -130,8 +125,6 @@ func initGPUHandles() *handles {
// Aligned with driver, we can't carry as payloads
nvcudaMgmtName = "libcuda.so*"
nvcudaMgmtPatterns = NvcudaLinuxGlobs
oneapiMgmtName = "libze_intel_gpu.so"
oneapiMgmtPatterns = OneapiLinuxGlobs
default:
return gpuHandles
}
@@ -159,17 +152,6 @@ func initGPUHandles() *handles {
}
}
oneapiLibPaths := FindGPULibs(oneapiMgmtName, oneapiMgmtPatterns)
if len(oneapiLibPaths) > 0 {
deviceCount, oneapi, libPath := LoadOneapiMgmt(oneapiLibPaths)
if oneapi != nil {
slog.Debug("detected Intel GPUs", "library", libPath, "count", deviceCount)
gpuHandles.oneapi = oneapi
gpuHandles.deviceCount = deviceCount
return gpuHandles
}
}
return gpuHandles
}
@@ -245,18 +227,6 @@ func GetGPUInfo() GpuInfoList {
// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
resp = append(resp, gpuInfo)
}
if gpuHandles.oneapi != nil {
gpuInfo := GpuInfo{
Library: "oneapi",
}
C.oneapi_check_vram(*gpuHandles.oneapi, &memInfo)
var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
memInfo.free = C.uint64_t(totalFreeMem)
gpuInfo.TotalMemory = uint64(memInfo.total)
gpuInfo.FreeMemory = uint64(memInfo.free)
gpuInfo.ID = strconv.Itoa(i)
resp = append(resp, gpuInfo)
}
}
// Then AMD