mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-03 11:13:31 -05:00
Compare commits
3 Commits
docker_ima
...
v2.18.1
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b941732f54 | ||
|
|
e591ff2e74 | ||
|
|
bd2f95c130 |
@@ -282,6 +282,8 @@ COPY --from=grpc /opt/grpc /usr/local
|
||||
|
||||
# Rebuild with defaults backends
|
||||
WORKDIR /build
|
||||
|
||||
## Build the binary
|
||||
RUN make build
|
||||
|
||||
RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
|
||||
|
||||
2
Makefile
2
Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
|
||||
|
||||
# llama.cpp versions
|
||||
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
||||
CPPLLAMA_VERSION?=9ef07800622e4c371605f9419864d15667c3558f
|
||||
CPPLLAMA_VERSION?=cb5fad4c6c2cbef92e9b8b63449e1cb7664e4846
|
||||
|
||||
# gpt4all version
|
||||
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
|
||||
|
||||
@@ -247,14 +247,23 @@ func selectGRPCProcess(backend, assetDir string, f16 bool) string {
|
||||
}
|
||||
|
||||
if xsysinfo.HasCPUCaps(cpuid.AVX2) {
|
||||
log.Info().Msgf("[%s] attempting to load with AVX2 variant", backend)
|
||||
grpcProcess = backendPath(assetDir, LLamaCPPAVX2)
|
||||
p := backendPath(assetDir, LLamaCPPAVX2)
|
||||
if _, err := os.Stat(p); err == nil {
|
||||
log.Info().Msgf("[%s] attempting to load with AVX2 variant", backend)
|
||||
grpcProcess = p
|
||||
}
|
||||
} else if xsysinfo.HasCPUCaps(cpuid.AVX) {
|
||||
log.Info().Msgf("[%s] attempting to load with AVX variant", backend)
|
||||
grpcProcess = backendPath(assetDir, LLamaCPPAVX)
|
||||
p := backendPath(assetDir, LLamaCPPAVX)
|
||||
if _, err := os.Stat(p); err == nil {
|
||||
log.Info().Msgf("[%s] attempting to load with AVX variant", backend)
|
||||
grpcProcess = p
|
||||
}
|
||||
} else {
|
||||
log.Info().Msgf("[%s] attempting to load with fallback variant", backend)
|
||||
grpcProcess = backendPath(assetDir, LLamaCPPFallback)
|
||||
p := backendPath(assetDir, LLamaCPPFallback)
|
||||
if _, err := os.Stat(p); err == nil {
|
||||
log.Info().Msgf("[%s] attempting to load with fallback variant", backend)
|
||||
grpcProcess = p
|
||||
}
|
||||
}
|
||||
|
||||
return grpcProcess
|
||||
@@ -509,6 +518,39 @@ func (ml *ModelLoader) GreedyLoader(opts ...Option) (grpc.Backend, error) {
|
||||
err = errors.Join(err, fmt.Errorf("backend %s returned no usable model", key))
|
||||
log.Info().Msgf("[%s] Fails: %s", key, "backend returned no usable model")
|
||||
}
|
||||
|
||||
if autoDetect && key == LLamaCPP && err != nil {
|
||||
// try as hard as possible to run the llama.cpp variants
|
||||
backendToUse := ""
|
||||
if xsysinfo.HasCPUCaps(cpuid.AVX2) {
|
||||
if _, err := os.Stat(backendPath(o.assetDir, LLamaCPPAVX2)); err == nil {
|
||||
backendToUse = LLamaCPPAVX2
|
||||
}
|
||||
} else if xsysinfo.HasCPUCaps(cpuid.AVX) {
|
||||
if _, err := os.Stat(backendPath(o.assetDir, LLamaCPPAVX2)); err == nil {
|
||||
backendToUse = LLamaCPPAVX
|
||||
}
|
||||
} else {
|
||||
if _, err := os.Stat(backendPath(o.assetDir, LLamaCPPFallback)); err == nil {
|
||||
backendToUse = LLamaCPPFallback
|
||||
} else {
|
||||
// If we don't have a fallback, just skip fallback
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// Autodetection failed, try the fallback
|
||||
log.Info().Msgf("[%s] Autodetection failed, trying the fallback", key)
|
||||
options = append(options, WithBackendString(backendToUse))
|
||||
model, modelerr = ml.BackendLoader(options...)
|
||||
if modelerr == nil && model != nil {
|
||||
log.Info().Msgf("[%s] Loads OK", key)
|
||||
return model, nil
|
||||
} else {
|
||||
err = errors.Join(err, fmt.Errorf("[%s]: %w", key, modelerr))
|
||||
log.Info().Msgf("[%s] Fails: %s", key, modelerr.Error())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("could not load model - all backends returned error: %s", err.Error())
|
||||
|
||||
Reference in New Issue
Block a user