Compare commits

...

3 Commits

Author SHA1 Message Date
LocalAI [bot]
b941732f54 ⬆️ Update ggerganov/llama.cpp (#2696)
Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-07-01 22:52:43 +02:00
Ettore Di Giacinto
e591ff2e74 fix(initializer): do select backends that exist (#2694)
we were not checking if the binary exists before picking these up from
the asset dir.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-01 22:50:36 +02:00
Ettore Di Giacinto
bd2f95c130 feat(backend): fallback with autodetect (#2693)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-07-01 18:11:04 +02:00
3 changed files with 51 additions and 7 deletions

View File

@@ -282,6 +282,8 @@ COPY --from=grpc /opt/grpc /usr/local
# Rebuild with defaults backends
WORKDIR /build
## Build the binary
RUN make build
RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \

View File

@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
# llama.cpp versions
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
CPPLLAMA_VERSION?=9ef07800622e4c371605f9419864d15667c3558f
CPPLLAMA_VERSION?=cb5fad4c6c2cbef92e9b8b63449e1cb7664e4846
# gpt4all version
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all

View File

@@ -247,14 +247,23 @@ func selectGRPCProcess(backend, assetDir string, f16 bool) string {
}
if xsysinfo.HasCPUCaps(cpuid.AVX2) {
log.Info().Msgf("[%s] attempting to load with AVX2 variant", backend)
grpcProcess = backendPath(assetDir, LLamaCPPAVX2)
p := backendPath(assetDir, LLamaCPPAVX2)
if _, err := os.Stat(p); err == nil {
log.Info().Msgf("[%s] attempting to load with AVX2 variant", backend)
grpcProcess = p
}
} else if xsysinfo.HasCPUCaps(cpuid.AVX) {
log.Info().Msgf("[%s] attempting to load with AVX variant", backend)
grpcProcess = backendPath(assetDir, LLamaCPPAVX)
p := backendPath(assetDir, LLamaCPPAVX)
if _, err := os.Stat(p); err == nil {
log.Info().Msgf("[%s] attempting to load with AVX variant", backend)
grpcProcess = p
}
} else {
log.Info().Msgf("[%s] attempting to load with fallback variant", backend)
grpcProcess = backendPath(assetDir, LLamaCPPFallback)
p := backendPath(assetDir, LLamaCPPFallback)
if _, err := os.Stat(p); err == nil {
log.Info().Msgf("[%s] attempting to load with fallback variant", backend)
grpcProcess = p
}
}
return grpcProcess
@@ -509,6 +518,39 @@ func (ml *ModelLoader) GreedyLoader(opts ...Option) (grpc.Backend, error) {
err = errors.Join(err, fmt.Errorf("backend %s returned no usable model", key))
log.Info().Msgf("[%s] Fails: %s", key, "backend returned no usable model")
}
if autoDetect && key == LLamaCPP && err != nil {
// try as hard as possible to run the llama.cpp variants
backendToUse := ""
if xsysinfo.HasCPUCaps(cpuid.AVX2) {
if _, err := os.Stat(backendPath(o.assetDir, LLamaCPPAVX2)); err == nil {
backendToUse = LLamaCPPAVX2
}
} else if xsysinfo.HasCPUCaps(cpuid.AVX) {
if _, err := os.Stat(backendPath(o.assetDir, LLamaCPPAVX2)); err == nil {
backendToUse = LLamaCPPAVX
}
} else {
if _, err := os.Stat(backendPath(o.assetDir, LLamaCPPFallback)); err == nil {
backendToUse = LLamaCPPFallback
} else {
// If we don't have a fallback, just skip fallback
continue
}
}
// Autodetection failed, try the fallback
log.Info().Msgf("[%s] Autodetection failed, trying the fallback", key)
options = append(options, WithBackendString(backendToUse))
model, modelerr = ml.BackendLoader(options...)
if modelerr == nil && model != nil {
log.Info().Msgf("[%s] Loads OK", key)
return model, nil
} else {
err = errors.Join(err, fmt.Errorf("[%s]: %w", key, modelerr))
log.Info().Msgf("[%s] Fails: %s", key, modelerr.Error())
}
}
}
return nil, fmt.Errorf("could not load model - all backends returned error: %s", err.Error())