mirror of
https://github.com/ollama/ollama.git
synced 2026-02-08 06:34:22 -05:00
Remove static VRAM estimation (EstimateVRAM, CheckMemoryRequirements) which wasn't helpful. Instead, report the actual tensor weight size from the manifest for ollama ps. - Remove memory estimation check from runner startup - Remove EstimateVRAM, CheckMemoryRequirements, modelVRAMEstimates - Add TotalTensorSize() to get actual weight size from manifest - Use weight size for Server.vramSize instead of estimates Note: This is better than showing 0 or inaccurate estimates, but the weight size is a drastic underestimation of actual memory usage since it doesn't account for activations, intermediate tensors, or MLX overhead. Future work should query real-time memory from MLX (e.g., MetalGetActiveMemory) for accurate reporting.
79 lines
2.3 KiB
Go
79 lines
2.3 KiB
Go
// Package imagegen provides experimental image generation capabilities for Ollama.
|
|
//
|
|
// This package is in x/ because the tensor model storage format is under development.
|
|
// The goal is to integrate these capabilities into the main Ollama packages once
|
|
// the format is stable.
|
|
//
|
|
// TODO (jmorganca): Integrate into main packages when stable:
|
|
// - CLI commands → cmd/
|
|
// - API endpoints → api/
|
|
// - Model creation → server/
|
|
package imagegen
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"runtime"
|
|
)
|
|
|
|
// SupportedBackends lists the backends that support image generation.
|
|
var SupportedBackends = []string{"metal", "cuda", "cpu"}
|
|
|
|
// CheckPlatformSupport validates that image generation is supported on the current platform.
|
|
// Returns nil if supported, or an error describing why it's not supported.
|
|
func CheckPlatformSupport() error {
|
|
switch runtime.GOOS {
|
|
case "darwin":
|
|
// macOS: Metal is supported via MLX
|
|
if runtime.GOARCH != "arm64" {
|
|
return fmt.Errorf("image generation on macOS requires Apple Silicon (arm64), got %s", runtime.GOARCH)
|
|
}
|
|
return nil
|
|
case "linux", "windows":
|
|
// Linux/Windows: CUDA support (requires mlx or cuda build)
|
|
// The actual backend availability is checked at runtime
|
|
return nil
|
|
default:
|
|
return fmt.Errorf("image generation is not supported on %s", runtime.GOOS)
|
|
}
|
|
}
|
|
|
|
// ResolveModelName checks if a model name is a known image generation model.
|
|
// Returns the normalized model name if found, empty string otherwise.
|
|
func ResolveModelName(modelName string) string {
|
|
manifest, err := LoadManifest(modelName)
|
|
if err == nil && manifest.HasTensorLayers() {
|
|
return modelName
|
|
}
|
|
return ""
|
|
}
|
|
|
|
// DetectModelType reads model_index.json and returns the model type.
|
|
// Checks both "architecture" (Ollama format) and "_class_name" (diffusers format).
|
|
// Returns empty string if detection fails.
|
|
func DetectModelType(modelName string) string {
|
|
manifest, err := LoadManifest(modelName)
|
|
if err != nil {
|
|
return ""
|
|
}
|
|
|
|
data, err := manifest.ReadConfig("model_index.json")
|
|
if err != nil {
|
|
return ""
|
|
}
|
|
|
|
var index struct {
|
|
Architecture string `json:"architecture"`
|
|
ClassName string `json:"_class_name"`
|
|
}
|
|
if err := json.Unmarshal(data, &index); err != nil {
|
|
return ""
|
|
}
|
|
|
|
// Prefer architecture (Ollama format), fall back to _class_name (diffusers)
|
|
if index.Architecture != "" {
|
|
return index.Architecture
|
|
}
|
|
return index.ClassName
|
|
}
|