mirror of
https://github.com/mudler/LocalAI.git
synced 2026-04-01 05:36:49 -04:00
* feat: add distributed mode (experimental) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix data races, mutexes, transactions Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix events and tool stream in agent chat Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * use ginkgo Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(cron): compute correctly time boundaries avoiding re-triggering Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * enhancements, refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * do not flood of healthy checks Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * do not list obvious backends as text backends Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * tests fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Drop redundant healthcheck Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * enhancements, refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
47 lines
1.0 KiB
Go
47 lines
1.0 KiB
Go
package vram
|
|
|
|
import (
|
|
"context"
|
|
"strings"
|
|
|
|
gguf "github.com/gpustack/gguf-parser-go"
|
|
"github.com/mudler/LocalAI/pkg/downloader"
|
|
)
|
|
|
|
type defaultGGUFReader struct{}
|
|
|
|
func (defaultGGUFReader) ReadMetadata(ctx context.Context, uri string) (*GGUFMeta, error) {
|
|
u := downloader.URI(uri)
|
|
urlStr := u.ResolveURL()
|
|
|
|
if strings.HasPrefix(uri, downloader.LocalPrefix) {
|
|
f, err := gguf.ParseGGUFFile(urlStr)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return ggufFileToMeta(f), nil
|
|
}
|
|
if !u.LooksLikeHTTPURL() {
|
|
return nil, nil
|
|
}
|
|
f, err := gguf.ParseGGUFFileRemote(ctx, urlStr)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return ggufFileToMeta(f), nil
|
|
}
|
|
|
|
func ggufFileToMeta(f *gguf.GGUFFile) *GGUFMeta {
|
|
arch := f.Architecture()
|
|
meta := &GGUFMeta{
|
|
BlockCount: uint32(arch.BlockCount),
|
|
EmbeddingLength: uint32(arch.EmbeddingLength),
|
|
HeadCount: uint32(arch.AttentionHeadCount),
|
|
HeadCountKV: uint32(arch.AttentionHeadCountKV),
|
|
}
|
|
if meta.HeadCountKV == 0 {
|
|
meta.HeadCountKV = meta.HeadCount
|
|
}
|
|
return meta
|
|
}
|