Files
ollama/x/imagegen/memory.go
Jeffrey Morgan 3b3bf6c217 x/imagegen: replace memory estimation with actual weight size (#13848)
Remove static VRAM estimation (EstimateVRAM, CheckMemoryRequirements)
which wasn't helpful. Instead, report the actual tensor weight size
from the manifest for ollama ps.

- Remove memory estimation check from runner startup
- Remove EstimateVRAM, CheckMemoryRequirements, modelVRAMEstimates
- Add TotalTensorSize() to get actual weight size from manifest
- Use weight size for Server.vramSize instead of estimates

Note: This is better than showing 0 or inaccurate estimates, but the
weight size is a drastic underestimation of actual memory usage since
it doesn't account for activations, intermediate tensors, or MLX
overhead. Future work should query real-time memory from MLX
(e.g., MetalGetActiveMemory) for accurate reporting.
2026-01-22 18:32:41 -08:00

79 lines
2.3 KiB
Go

// Package imagegen provides experimental image generation capabilities for Ollama.
//
// This package is in x/ because the tensor model storage format is under development.
// The goal is to integrate these capabilities into the main Ollama packages once
// the format is stable.
//
// TODO (jmorganca): Integrate into main packages when stable:
// - CLI commands → cmd/
// - API endpoints → api/
// - Model creation → server/
package imagegen
import (
"encoding/json"
"fmt"
"runtime"
)
// SupportedBackends lists the backends that support image generation.
var SupportedBackends = []string{"metal", "cuda", "cpu"}
// CheckPlatformSupport validates that image generation is supported on the current platform.
// Returns nil if supported, or an error describing why it's not supported.
func CheckPlatformSupport() error {
switch runtime.GOOS {
case "darwin":
// macOS: Metal is supported via MLX
if runtime.GOARCH != "arm64" {
return fmt.Errorf("image generation on macOS requires Apple Silicon (arm64), got %s", runtime.GOARCH)
}
return nil
case "linux", "windows":
// Linux/Windows: CUDA support (requires mlx or cuda build)
// The actual backend availability is checked at runtime
return nil
default:
return fmt.Errorf("image generation is not supported on %s", runtime.GOOS)
}
}
// ResolveModelName checks if a model name is a known image generation model.
// Returns the normalized model name if found, empty string otherwise.
func ResolveModelName(modelName string) string {
manifest, err := LoadManifest(modelName)
if err == nil && manifest.HasTensorLayers() {
return modelName
}
return ""
}
// DetectModelType reads model_index.json and returns the model type.
// Checks both "architecture" (Ollama format) and "_class_name" (diffusers format).
// Returns empty string if detection fails.
func DetectModelType(modelName string) string {
manifest, err := LoadManifest(modelName)
if err != nil {
return ""
}
data, err := manifest.ReadConfig("model_index.json")
if err != nil {
return ""
}
var index struct {
Architecture string `json:"architecture"`
ClassName string `json:"_class_name"`
}
if err := json.Unmarshal(data, &index); err != nil {
return ""
}
// Prefer architecture (Ollama format), fall back to _class_name (diffusers)
if index.Architecture != "" {
return index.Architecture
}
return index.ClassName
}