⬆️ Update ggerganov/llama.cpp (#2696 )

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
fix(initializer): do select backends that exist (#2694 )
2026-02-03 11:13:31 -05:00 · 2024-07-01 22:52:43 +02:00 · 2024-07-01 22:50:36 +02:00 · 2024-07-01 18:11:04 +02:00
3 changed files with 51 additions and 7 deletions
--- a/2
+++ b/2
@@ -282,6 +282,8 @@ COPY --from=grpc /opt/grpc /usr/local

 # Rebuild with defaults backends
 WORKDIR /build
+
+## Build the binary
 RUN make build

 RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
--- a/2
+++ b/2
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai

 # llama.cpp versions
 GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=9ef07800622e4c371605f9419864d15667c3558f
+CPPLLAMA_VERSION?=cb5fad4c6c2cbef92e9b8b63449e1cb7664e4846

 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
--- a/pkg/model/initializers.go
+++ b/pkg/model/initializers.go
@@ -247,14 +247,23 @@ func selectGRPCProcess(backend, assetDir string, f16 bool) string {
 	}

 	if xsysinfo.HasCPUCaps(cpuid.AVX2) {
-		log.Info().Msgf("[%s] attempting to load with AVX2 variant", backend)
-		grpcProcess = backendPath(assetDir, LLamaCPPAVX2)
+		p := backendPath(assetDir, LLamaCPPAVX2)
+		if _, err := os.Stat(p); err == nil {
+			log.Info().Msgf("[%s] attempting to load with AVX2 variant", backend)
+			grpcProcess = p
+		}
 	} else if xsysinfo.HasCPUCaps(cpuid.AVX) {
-		log.Info().Msgf("[%s] attempting to load with AVX variant", backend)
-		grpcProcess = backendPath(assetDir, LLamaCPPAVX)
+		p := backendPath(assetDir, LLamaCPPAVX)
+		if _, err := os.Stat(p); err == nil {
+			log.Info().Msgf("[%s] attempting to load with AVX variant", backend)
+			grpcProcess = p
+		}
 	} else {
-		log.Info().Msgf("[%s] attempting to load with fallback variant", backend)
-		grpcProcess = backendPath(assetDir, LLamaCPPFallback)
+		p := backendPath(assetDir, LLamaCPPFallback)
+		if _, err := os.Stat(p); err == nil {
+			log.Info().Msgf("[%s] attempting to load with fallback variant", backend)
+			grpcProcess = p
+		}
 	}

 	return grpcProcess
@@ -509,6 +518,39 @@ func (ml *ModelLoader) GreedyLoader(opts ...Option) (grpc.Backend, error) {
 			err = errors.Join(err, fmt.Errorf("backend %s returned no usable model", key))
 			log.Info().Msgf("[%s] Fails: %s", key, "backend returned no usable model")
 		}
+
+		if autoDetect && key == LLamaCPP && err != nil {
+			// try as hard as possible to run the llama.cpp variants
+			backendToUse := ""
+			if xsysinfo.HasCPUCaps(cpuid.AVX2) {
+				if _, err := os.Stat(backendPath(o.assetDir, LLamaCPPAVX2)); err == nil {
+					backendToUse = LLamaCPPAVX2
+				}
+			} else if xsysinfo.HasCPUCaps(cpuid.AVX) {
+				if _, err := os.Stat(backendPath(o.assetDir, LLamaCPPAVX2)); err == nil {
+					backendToUse = LLamaCPPAVX
+				}
+			} else {
+				if _, err := os.Stat(backendPath(o.assetDir, LLamaCPPFallback)); err == nil {
+					backendToUse = LLamaCPPFallback
+				} else {
+					// If we don't have a fallback, just skip fallback
+					continue
+				}
+			}
+
+			// Autodetection failed, try the fallback
+			log.Info().Msgf("[%s] Autodetection failed, trying the fallback", key)
+			options = append(options, WithBackendString(backendToUse))
+			model, modelerr = ml.BackendLoader(options...)
+			if modelerr == nil && model != nil {
+				log.Info().Msgf("[%s] Loads OK", key)
+				return model, nil
+			} else {
+				err = errors.Join(err, fmt.Errorf("[%s]: %w", key, modelerr))
+				log.Info().Msgf("[%s] Fails: %s", key, modelerr.Error())
+			}
+		}
 	}

 	return nil, fmt.Errorf("could not load model - all backends returned error: %s", err.Error())