mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-30 11:26:32 -04:00
fix(import): derive model name from selected GGUF for repo-root URIs (#10589)
When importing a HuggingFace GGUF model from a repository-root URI (no file component, e.g. hf://owner/repo) with the Model Name field left blank, the importer named the model after the repository (filepath.Base(details.URI)) instead of the GGUF file it actually selected from the repo listing (issue #10587). Track whether the user supplied an explicit name; the URI base is now only a fallback. In the HuggingFace branch, once the model group is picked, re-derive the name from the selected GGUF via a new modelNameFromShardGroup helper that uses ShardGroup.Base minus the .gguf extension. For sharded models this yields a clean logical name (e.g. Qwen3-30B-A3B-Q4_K_M) rather than a shard filename like ...-00001-of-00002. An explicit name preference still always wins, and the .gguf/URL/OCI paths are unchanged. Add network-free unit specs covering name-from-GGUF, clean-name-from-shard-base, and explicit-name precedence, and update the live integration specs that had encoded the previous repo-name behaviour. Assisted-by: Claude:claude-opus-4-8 [Claude Code] Signed-off-by: Adira Denis Muhando <dennisadira@gmail.com>
This commit is contained in:
@@ -22,11 +22,13 @@ var _ = Describe("DiscoverModelConfig", func() {
|
||||
modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
|
||||
|
||||
Expect(err).ToNot(HaveOccurred(), fmt.Sprintf("Error: %v", err))
|
||||
Expect(modelConfig.Name).To(Equal("LocalAI-functioncall-qwen2.5-7b-v0.5-Q4_K_M-GGUF"), fmt.Sprintf("Model config: %+v", modelConfig))
|
||||
// No name preference + repo-root URI: the name follows the selected
|
||||
// GGUF file, not the repo (issue #10587).
|
||||
Expect(modelConfig.Name).To(Equal("localai-functioncall-qwen2.5-7b-v0.5-q4_k_m"), fmt.Sprintf("Model config: %+v", modelConfig))
|
||||
Expect(modelConfig.Description).To(Equal("Imported from https://huggingface.co/mudler/LocalAI-functioncall-qwen2.5-7b-v0.5-Q4_K_M-GGUF"), fmt.Sprintf("Model config: %+v", modelConfig))
|
||||
Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: llama-cpp"), fmt.Sprintf("Model config: %+v", modelConfig))
|
||||
Expect(len(modelConfig.Files)).To(Equal(1), fmt.Sprintf("Model config: %+v", modelConfig))
|
||||
Expect(modelConfig.Files[0].Filename).To(Equal("llama-cpp/models/LocalAI-functioncall-qwen2.5-7b-v0.5-Q4_K_M-GGUF/localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
|
||||
Expect(modelConfig.Files[0].Filename).To(Equal("llama-cpp/models/localai-functioncall-qwen2.5-7b-v0.5-q4_k_m/localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
|
||||
Expect(modelConfig.Files[0].URI).To(Equal("https://huggingface.co/mudler/LocalAI-functioncall-qwen2.5-7b-v0.5-Q4_K_M-GGUF/resolve/main/localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
|
||||
Expect(modelConfig.Files[0].SHA256).To(Equal("4e7b7fe1d54b881f1ef90799219dc6cc285d29db24f559c8998d1addb35713d4"), fmt.Sprintf("Model config: %+v", modelConfig))
|
||||
})
|
||||
@@ -38,16 +40,17 @@ var _ = Describe("DiscoverModelConfig", func() {
|
||||
modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
|
||||
|
||||
Expect(err).ToNot(HaveOccurred(), fmt.Sprintf("Error: %v", err))
|
||||
Expect(modelConfig.Name).To(Equal("Qwen3-VL-2B-Instruct-GGUF"), fmt.Sprintf("Model config: %+v", modelConfig))
|
||||
// No name preference: name follows the selected model GGUF (issue #10587).
|
||||
Expect(modelConfig.Name).To(Equal("Qwen3VL-2B-Instruct-Q4_K_M"), fmt.Sprintf("Model config: %+v", modelConfig))
|
||||
Expect(modelConfig.Description).To(Equal("Imported from https://huggingface.co/Qwen/Qwen3-VL-2B-Instruct-GGUF"), fmt.Sprintf("Model config: %+v", modelConfig))
|
||||
Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: llama-cpp"), fmt.Sprintf("Model config: %+v", modelConfig))
|
||||
Expect(modelConfig.ConfigFile).To(ContainSubstring("mmproj: llama-cpp/mmproj/Qwen3-VL-2B-Instruct-GGUF/mmproj-Qwen3VL-2B-Instruct-Q8_0.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
|
||||
Expect(modelConfig.ConfigFile).To(ContainSubstring("model: llama-cpp/models/Qwen3-VL-2B-Instruct-GGUF/Qwen3VL-2B-Instruct-Q4_K_M.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
|
||||
Expect(modelConfig.ConfigFile).To(ContainSubstring("mmproj: llama-cpp/mmproj/Qwen3VL-2B-Instruct-Q4_K_M/mmproj-Qwen3VL-2B-Instruct-Q8_0.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
|
||||
Expect(modelConfig.ConfigFile).To(ContainSubstring("model: llama-cpp/models/Qwen3VL-2B-Instruct-Q4_K_M/Qwen3VL-2B-Instruct-Q4_K_M.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
|
||||
Expect(len(modelConfig.Files)).To(Equal(2), fmt.Sprintf("Model config: %+v", modelConfig))
|
||||
Expect(modelConfig.Files[0].Filename).To(Equal("llama-cpp/models/Qwen3-VL-2B-Instruct-GGUF/Qwen3VL-2B-Instruct-Q4_K_M.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
|
||||
Expect(modelConfig.Files[0].Filename).To(Equal("llama-cpp/models/Qwen3VL-2B-Instruct-Q4_K_M/Qwen3VL-2B-Instruct-Q4_K_M.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
|
||||
Expect(modelConfig.Files[0].URI).To(Equal("https://huggingface.co/Qwen/Qwen3-VL-2B-Instruct-GGUF/resolve/main/Qwen3VL-2B-Instruct-Q4_K_M.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
|
||||
Expect(modelConfig.Files[0].SHA256).ToNot(BeEmpty(), fmt.Sprintf("Model config: %+v", modelConfig))
|
||||
Expect(modelConfig.Files[1].Filename).To(Equal("llama-cpp/mmproj/Qwen3-VL-2B-Instruct-GGUF/mmproj-Qwen3VL-2B-Instruct-Q8_0.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
|
||||
Expect(modelConfig.Files[1].Filename).To(Equal("llama-cpp/mmproj/Qwen3VL-2B-Instruct-Q4_K_M/mmproj-Qwen3VL-2B-Instruct-Q8_0.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
|
||||
Expect(modelConfig.Files[1].URI).To(Equal("https://huggingface.co/Qwen/Qwen3-VL-2B-Instruct-GGUF/resolve/main/mmproj-Qwen3VL-2B-Instruct-Q8_0.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
|
||||
Expect(modelConfig.Files[1].SHA256).ToNot(BeEmpty(), fmt.Sprintf("Model config: %+v", modelConfig))
|
||||
})
|
||||
@@ -59,16 +62,17 @@ var _ = Describe("DiscoverModelConfig", func() {
|
||||
modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
|
||||
|
||||
Expect(err).ToNot(HaveOccurred(), fmt.Sprintf("Error: %v", err))
|
||||
Expect(modelConfig.Name).To(Equal("Qwen3-VL-2B-Instruct-GGUF"), fmt.Sprintf("Model config: %+v", modelConfig))
|
||||
// No name preference: name follows the selected Q8_0 model GGUF (issue #10587).
|
||||
Expect(modelConfig.Name).To(Equal("Qwen3VL-2B-Instruct-Q8_0"), fmt.Sprintf("Model config: %+v", modelConfig))
|
||||
Expect(modelConfig.Description).To(Equal("Imported from https://huggingface.co/Qwen/Qwen3-VL-2B-Instruct-GGUF"), fmt.Sprintf("Model config: %+v", modelConfig))
|
||||
Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: llama-cpp"), fmt.Sprintf("Model config: %+v", modelConfig))
|
||||
Expect(modelConfig.ConfigFile).To(ContainSubstring("mmproj: llama-cpp/mmproj/Qwen3-VL-2B-Instruct-GGUF/mmproj-Qwen3VL-2B-Instruct-F16.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
|
||||
Expect(modelConfig.ConfigFile).To(ContainSubstring("model: llama-cpp/models/Qwen3-VL-2B-Instruct-GGUF/Qwen3VL-2B-Instruct-Q8_0.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
|
||||
Expect(modelConfig.ConfigFile).To(ContainSubstring("mmproj: llama-cpp/mmproj/Qwen3VL-2B-Instruct-Q8_0/mmproj-Qwen3VL-2B-Instruct-F16.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
|
||||
Expect(modelConfig.ConfigFile).To(ContainSubstring("model: llama-cpp/models/Qwen3VL-2B-Instruct-Q8_0/Qwen3VL-2B-Instruct-Q8_0.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
|
||||
Expect(len(modelConfig.Files)).To(Equal(2), fmt.Sprintf("Model config: %+v", modelConfig))
|
||||
Expect(modelConfig.Files[0].Filename).To(Equal("llama-cpp/models/Qwen3-VL-2B-Instruct-GGUF/Qwen3VL-2B-Instruct-Q8_0.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
|
||||
Expect(modelConfig.Files[0].Filename).To(Equal("llama-cpp/models/Qwen3VL-2B-Instruct-Q8_0/Qwen3VL-2B-Instruct-Q8_0.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
|
||||
Expect(modelConfig.Files[0].URI).To(Equal("https://huggingface.co/Qwen/Qwen3-VL-2B-Instruct-GGUF/resolve/main/Qwen3VL-2B-Instruct-Q8_0.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
|
||||
Expect(modelConfig.Files[0].SHA256).ToNot(BeEmpty(), fmt.Sprintf("Model config: %+v", modelConfig))
|
||||
Expect(modelConfig.Files[1].Filename).To(Equal("llama-cpp/mmproj/Qwen3-VL-2B-Instruct-GGUF/mmproj-Qwen3VL-2B-Instruct-F16.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
|
||||
Expect(modelConfig.Files[1].Filename).To(Equal("llama-cpp/mmproj/Qwen3VL-2B-Instruct-Q8_0/mmproj-Qwen3VL-2B-Instruct-F16.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
|
||||
Expect(modelConfig.Files[1].URI).To(Equal("https://huggingface.co/Qwen/Qwen3-VL-2B-Instruct-GGUF/resolve/main/mmproj-Qwen3VL-2B-Instruct-F16.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
|
||||
Expect(modelConfig.Files[1].SHA256).ToNot(BeEmpty(), fmt.Sprintf("Model config: %+v", modelConfig))
|
||||
})
|
||||
|
||||
@@ -98,8 +98,13 @@ func (i *LlamaCPPImporter) Import(details Details) (gallery.ModelConfig, error)
|
||||
}
|
||||
}
|
||||
|
||||
name, ok := preferencesMap["name"].(string)
|
||||
if !ok {
|
||||
// nameProvided tracks whether the user supplied an explicit model name.
|
||||
// When they didn't, the URI base is only a fallback: for a HuggingFace
|
||||
// repo-root URI (no file component) it would be the repo name, so the HF
|
||||
// branch below re-derives the name from the selected GGUF file instead
|
||||
// (issue #10587).
|
||||
name, nameProvided := preferencesMap["name"].(string)
|
||||
if !nameProvided {
|
||||
name = filepath.Base(details.URI)
|
||||
}
|
||||
|
||||
@@ -227,10 +232,23 @@ func (i *LlamaCPPImporter) Import(details Details) (gallery.ModelConfig, error)
|
||||
mmprojGroups := hfapi.GroupShards(mmprojFiles)
|
||||
ggufGroups := hfapi.GroupShards(ggufFiles)
|
||||
|
||||
modelGroup := pickPreferredGroup(ggufGroups, quants)
|
||||
|
||||
// A repo-root URI has no file component, so the URI-base fallback
|
||||
// above produced the repo name. When the user left the name blank,
|
||||
// derive it from the GGUF file actually selected from the listing so
|
||||
// the gallery entry and `model:` directory reflect the model, not the
|
||||
// repository (issue #10587). An explicit name preference always wins.
|
||||
if !nameProvided && modelGroup != nil {
|
||||
name = modelNameFromShardGroup(*modelGroup)
|
||||
modelConfig.Name = name
|
||||
cfg.Name = name
|
||||
}
|
||||
|
||||
// Emit the model group first so cfg.Files[0] is the model — callers
|
||||
// and tests rely on the model file preceding any mmproj companion.
|
||||
if group := pickPreferredGroup(ggufGroups, quants); group != nil {
|
||||
appendShardGroup(&cfg, *group, filepath.Join("llama-cpp", "models", name))
|
||||
if modelGroup != nil {
|
||||
appendShardGroup(&cfg, *modelGroup, filepath.Join("llama-cpp", "models", name))
|
||||
}
|
||||
if group := pickPreferredGroup(mmprojGroups, mmprojQuantsList); group != nil {
|
||||
appendShardGroup(&cfg, *group, filepath.Join("llama-cpp", "mmproj", name))
|
||||
@@ -281,6 +299,20 @@ func (i *LlamaCPPImporter) Import(details Details) (gallery.ModelConfig, error)
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
// modelNameFromShardGroup derives a human-facing model name from the picked
|
||||
// GGUF group: the logical base filename with its .gguf extension stripped.
|
||||
// ShardGroup.Base is the common prefix for sharded sets (without the
|
||||
// -NNNNN-of-MMMMM suffix) and the sole basename for single-file models, so
|
||||
// this yields a clean name like "model-Q4_K_M" rather than an individual
|
||||
// shard filename or the repo-root URI base.
|
||||
func modelNameFromShardGroup(group hfapi.ShardGroup) string {
|
||||
base := group.Base
|
||||
if ext := filepath.Ext(base); strings.EqualFold(ext, ".gguf") {
|
||||
base = strings.TrimSuffix(base, ext)
|
||||
}
|
||||
return base
|
||||
}
|
||||
|
||||
// pickPreferredGroup walks the preference list in priority order and returns
|
||||
// the first group whose base filename contains any preference. When nothing
|
||||
// matches, the last group wins — this preserves the historical "if the user
|
||||
|
||||
@@ -372,6 +372,62 @@ var _ = Describe("LlamaCPPImporter", func() {
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(modelConfig.Files).To(BeEmpty())
|
||||
})
|
||||
|
||||
It("derives the model name from the selected GGUF when no name is given", func() {
|
||||
// Regression for #10587: a repo-root URI has no file component, so
|
||||
// the URI base ("example-GGUF") is just the repo name. With the
|
||||
// name field left blank, the emitted name and model directory must
|
||||
// follow the GGUF file actually selected, not the repository.
|
||||
details := withHF(`{"quantizations":"Q4_K_M"}`,
|
||||
hfFile("Meta-Llama-3-8B-Instruct.Q4_K_M.gguf", "aaa"),
|
||||
hfFile("Meta-Llama-3-8B-Instruct.Q3_K_M.gguf", "bbb"),
|
||||
)
|
||||
|
||||
modelConfig, err := importer.Import(details)
|
||||
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(modelConfig.Name).To(Equal("Meta-Llama-3-8B-Instruct.Q4_K_M"))
|
||||
Expect(modelConfig.Files).To(HaveLen(1), fmt.Sprintf("%+v", modelConfig))
|
||||
Expect(modelConfig.Files[0].Filename).To(Equal(
|
||||
"llama-cpp/models/Meta-Llama-3-8B-Instruct.Q4_K_M/Meta-Llama-3-8B-Instruct.Q4_K_M.gguf"))
|
||||
Expect(modelConfig.ConfigFile).To(ContainSubstring("name: Meta-Llama-3-8B-Instruct.Q4_K_M"))
|
||||
Expect(modelConfig.ConfigFile).To(ContainSubstring(
|
||||
"model: llama-cpp/models/Meta-Llama-3-8B-Instruct.Q4_K_M/Meta-Llama-3-8B-Instruct.Q4_K_M.gguf"))
|
||||
})
|
||||
|
||||
It("derives a clean name from the shard base for split GGUFs when no name is given", func() {
|
||||
// The selected primary file is shard 1; using its raw basename
|
||||
// would leak the -00001-of-00002 suffix into the name. The shard
|
||||
// base must be used so the name is the logical model.
|
||||
details := withHF(``,
|
||||
hfFile("Qwen3-30B-A3B-Q4_K_M-00001-of-00002.gguf", "p1"),
|
||||
hfFile("Qwen3-30B-A3B-Q4_K_M-00002-of-00002.gguf", "p2"),
|
||||
)
|
||||
|
||||
modelConfig, err := importer.Import(details)
|
||||
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(modelConfig.Name).To(Equal("Qwen3-30B-A3B-Q4_K_M"))
|
||||
Expect(modelConfig.Files).To(HaveLen(2), fmt.Sprintf("%+v", modelConfig))
|
||||
Expect(modelConfig.Files[0].Filename).To(Equal(
|
||||
"llama-cpp/models/Qwen3-30B-A3B-Q4_K_M/Qwen3-30B-A3B-Q4_K_M-00001-of-00002.gguf"))
|
||||
Expect(modelConfig.ConfigFile).To(ContainSubstring(
|
||||
"model: llama-cpp/models/Qwen3-30B-A3B-Q4_K_M/Qwen3-30B-A3B-Q4_K_M-00001-of-00002.gguf"))
|
||||
})
|
||||
|
||||
It("keeps an explicit name over the selected GGUF filename", func() {
|
||||
// Precedence guard: when the user supplies a name it always wins,
|
||||
// even though a GGUF file was selected from the listing.
|
||||
details := withHF(`{"name":"my-custom-name","quantizations":"Q4_K_M"}`,
|
||||
hfFile("model-Q4_K_M.gguf", "aaa"),
|
||||
)
|
||||
|
||||
modelConfig, err := importer.Import(details)
|
||||
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(modelConfig.Name).To(Equal("my-custom-name"))
|
||||
Expect(modelConfig.Files[0].Filename).To(Equal("llama-cpp/models/my-custom-name/model-Q4_K_M.gguf"))
|
||||
})
|
||||
})
|
||||
|
||||
Context("quant token boundary matching", func() {
|
||||
|
||||
Reference in New Issue
Block a user