mirror of
https://github.com/mudler/LocalAI.git
synced 2026-05-25 01:02:05 -04:00
Compare commits
6 Commits
copilot/up
...
workaround
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
59343860ef | ||
|
|
2de30440fe | ||
|
|
673a80a578 | ||
|
|
2554e9fabe | ||
|
|
5bfc3eebf8 | ||
|
|
ab893fe302 |
@@ -16,7 +16,7 @@ RUN apt-get update && \
|
|||||||
|
|
||||||
# The requirements-drivers target is for BUILD_TYPE specific items. If you need to install something specific to CUDA, or specific to ROCM, it goes here.
|
# The requirements-drivers target is for BUILD_TYPE specific items. If you need to install something specific to CUDA, or specific to ROCM, it goes here.
|
||||||
FROM requirements AS requirements-drivers
|
FROM requirements AS requirements-drivers
|
||||||
|
ARG VULKAN_FROM_SOURCE=false
|
||||||
ARG BUILD_TYPE
|
ARG BUILD_TYPE
|
||||||
ARG CUDA_MAJOR_VERSION=12
|
ARG CUDA_MAJOR_VERSION=12
|
||||||
ARG CUDA_MINOR_VERSION=0
|
ARG CUDA_MINOR_VERSION=0
|
||||||
@@ -41,7 +41,7 @@ RUN <<EOT bash
|
|||||||
git python-is-python3 bison libx11-xcb-dev liblz4-dev libzstd-dev \
|
git python-is-python3 bison libx11-xcb-dev liblz4-dev libzstd-dev \
|
||||||
ocaml-core ninja-build pkg-config libxml2-dev wayland-protocols python3-jsonschema \
|
ocaml-core ninja-build pkg-config libxml2-dev wayland-protocols python3-jsonschema \
|
||||||
clang-format qtbase5-dev qt6-base-dev libxcb-glx0-dev sudo xz-utils mesa-vulkan-drivers
|
clang-format qtbase5-dev qt6-base-dev libxcb-glx0-dev sudo xz-utils mesa-vulkan-drivers
|
||||||
if [ "amd64" = "$TARGETARCH" ]; then
|
if [ "amd64" = "$TARGETARCH" ] && [ "${VULKAN_FROM_SOURCE}" = "true" ]; then
|
||||||
wget "https://sdk.lunarg.com/sdk/download/1.4.328.1/linux/vulkansdk-linux-x86_64-1.4.328.1.tar.xz" && \
|
wget "https://sdk.lunarg.com/sdk/download/1.4.328.1/linux/vulkansdk-linux-x86_64-1.4.328.1.tar.xz" && \
|
||||||
tar -xf vulkansdk-linux-x86_64-1.4.328.1.tar.xz && \
|
tar -xf vulkansdk-linux-x86_64-1.4.328.1.tar.xz && \
|
||||||
rm vulkansdk-linux-x86_64-1.4.328.1.tar.xz && \
|
rm vulkansdk-linux-x86_64-1.4.328.1.tar.xz && \
|
||||||
@@ -59,6 +59,11 @@ RUN <<EOT bash
|
|||||||
cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/include/* /usr/include/ && \
|
cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/include/* /usr/include/ && \
|
||||||
cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/share/* /usr/share/ && \
|
cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/share/* /usr/share/ && \
|
||||||
rm -rf /opt/vulkan-sdk
|
rm -rf /opt/vulkan-sdk
|
||||||
|
elif [ "amd64" = "${TARGETARCH}}" ]; then
|
||||||
|
wget -qO- https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo tee /etc/apt/trusted.gpg.d/lunarg.asc && \
|
||||||
|
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-noble.list http://packages.lunarg.com/vulkan/lunarg-vulkan-noble.list && \
|
||||||
|
apt-get update && \
|
||||||
|
apt-get install -y vulkan-sdk
|
||||||
fi
|
fi
|
||||||
if [ "arm64" = "$TARGETARCH" ]; then
|
if [ "arm64" = "$TARGETARCH" ]; then
|
||||||
mkdir vulkan && cd vulkan && \
|
mkdir vulkan && cd vulkan && \
|
||||||
|
|||||||
@@ -46,7 +46,7 @@ RUN <<EOT bash
|
|||||||
git python-is-python3 bison libx11-xcb-dev liblz4-dev libzstd-dev \
|
git python-is-python3 bison libx11-xcb-dev liblz4-dev libzstd-dev \
|
||||||
ocaml-core ninja-build pkg-config libxml2-dev wayland-protocols python3-jsonschema \
|
ocaml-core ninja-build pkg-config libxml2-dev wayland-protocols python3-jsonschema \
|
||||||
clang-format qtbase5-dev qt6-base-dev libxcb-glx0-dev sudo xz-utils
|
clang-format qtbase5-dev qt6-base-dev libxcb-glx0-dev sudo xz-utils
|
||||||
if [ "amd64" = "$TARGETARCH" ]; then
|
if [ "amd64" = "$TARGETARCH" ] && [ "${VULKAN_FROM_SOURCE}" = "true" ]; then
|
||||||
wget "https://sdk.lunarg.com/sdk/download/1.4.328.1/linux/vulkansdk-linux-x86_64-1.4.328.1.tar.xz" && \
|
wget "https://sdk.lunarg.com/sdk/download/1.4.328.1/linux/vulkansdk-linux-x86_64-1.4.328.1.tar.xz" && \
|
||||||
tar -xf vulkansdk-linux-x86_64-1.4.328.1.tar.xz && \
|
tar -xf vulkansdk-linux-x86_64-1.4.328.1.tar.xz && \
|
||||||
rm vulkansdk-linux-x86_64-1.4.328.1.tar.xz && \
|
rm vulkansdk-linux-x86_64-1.4.328.1.tar.xz && \
|
||||||
@@ -64,6 +64,11 @@ RUN <<EOT bash
|
|||||||
cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/include/* /usr/include/ && \
|
cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/include/* /usr/include/ && \
|
||||||
cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/share/* /usr/share/ && \
|
cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/share/* /usr/share/ && \
|
||||||
rm -rf /opt/vulkan-sdk
|
rm -rf /opt/vulkan-sdk
|
||||||
|
elif [ "amd64" = "${TARGETARCH}}" ]; then
|
||||||
|
wget -qO- https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo tee /etc/apt/trusted.gpg.d/lunarg.asc && \
|
||||||
|
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-noble.list http://packages.lunarg.com/vulkan/lunarg-vulkan-noble.list && \
|
||||||
|
apt-get update && \
|
||||||
|
apt-get install -y vulkan-sdk
|
||||||
fi
|
fi
|
||||||
if [ "arm64" = "$TARGETARCH" ]; then
|
if [ "arm64" = "$TARGETARCH" ]; then
|
||||||
mkdir vulkan && cd vulkan && \
|
mkdir vulkan && cd vulkan && \
|
||||||
|
|||||||
@@ -103,7 +103,7 @@ RUN <<EOT bash
|
|||||||
git python-is-python3 bison libx11-xcb-dev liblz4-dev libzstd-dev \
|
git python-is-python3 bison libx11-xcb-dev liblz4-dev libzstd-dev \
|
||||||
ocaml-core ninja-build pkg-config libxml2-dev wayland-protocols python3-jsonschema \
|
ocaml-core ninja-build pkg-config libxml2-dev wayland-protocols python3-jsonschema \
|
||||||
clang-format qtbase5-dev qt6-base-dev libxcb-glx0-dev sudo xz-utils
|
clang-format qtbase5-dev qt6-base-dev libxcb-glx0-dev sudo xz-utils
|
||||||
if [ "amd64" = "$TARGETARCH" ]; then
|
if [ "amd64" = "$TARGETARCH" ] && [ "${VULKAN_FROM_SOURCE}" = "true" ]; then
|
||||||
wget "https://sdk.lunarg.com/sdk/download/1.4.328.1/linux/vulkansdk-linux-x86_64-1.4.328.1.tar.xz" && \
|
wget "https://sdk.lunarg.com/sdk/download/1.4.328.1/linux/vulkansdk-linux-x86_64-1.4.328.1.tar.xz" && \
|
||||||
tar -xf vulkansdk-linux-x86_64-1.4.328.1.tar.xz && \
|
tar -xf vulkansdk-linux-x86_64-1.4.328.1.tar.xz && \
|
||||||
rm vulkansdk-linux-x86_64-1.4.328.1.tar.xz && \
|
rm vulkansdk-linux-x86_64-1.4.328.1.tar.xz && \
|
||||||
@@ -121,6 +121,11 @@ RUN <<EOT bash
|
|||||||
cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/include/* /usr/include/ && \
|
cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/include/* /usr/include/ && \
|
||||||
cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/share/* /usr/share/ && \
|
cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/share/* /usr/share/ && \
|
||||||
rm -rf /opt/vulkan-sdk
|
rm -rf /opt/vulkan-sdk
|
||||||
|
elif [ "amd64" = "${TARGETARCH}}" ]; then
|
||||||
|
wget -qO- https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo tee /etc/apt/trusted.gpg.d/lunarg.asc && \
|
||||||
|
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-noble.list http://packages.lunarg.com/vulkan/lunarg-vulkan-noble.list && \
|
||||||
|
apt-get update && \
|
||||||
|
apt-get install -y vulkan-sdk
|
||||||
fi
|
fi
|
||||||
if [ "arm64" = "$TARGETARCH" ]; then
|
if [ "arm64" = "$TARGETARCH" ]; then
|
||||||
mkdir vulkan && cd vulkan && \
|
mkdir vulkan && cd vulkan && \
|
||||||
|
|||||||
@@ -60,7 +60,7 @@ RUN <<EOT bash
|
|||||||
git python-is-python3 bison libx11-xcb-dev liblz4-dev libzstd-dev \
|
git python-is-python3 bison libx11-xcb-dev liblz4-dev libzstd-dev \
|
||||||
ocaml-core ninja-build pkg-config libxml2-dev wayland-protocols python3-jsonschema \
|
ocaml-core ninja-build pkg-config libxml2-dev wayland-protocols python3-jsonschema \
|
||||||
clang-format qtbase5-dev qt6-base-dev libxcb-glx0-dev sudo xz-utils
|
clang-format qtbase5-dev qt6-base-dev libxcb-glx0-dev sudo xz-utils
|
||||||
if [ "amd64" = "$TARGETARCH" ]; then
|
if [ "amd64" = "$TARGETARCH" ] && [ "${VULKAN_FROM_SOURCE}" = "true" ]; then
|
||||||
wget "https://sdk.lunarg.com/sdk/download/1.4.328.1/linux/vulkansdk-linux-x86_64-1.4.328.1.tar.xz" && \
|
wget "https://sdk.lunarg.com/sdk/download/1.4.328.1/linux/vulkansdk-linux-x86_64-1.4.328.1.tar.xz" && \
|
||||||
tar -xf vulkansdk-linux-x86_64-1.4.328.1.tar.xz && \
|
tar -xf vulkansdk-linux-x86_64-1.4.328.1.tar.xz && \
|
||||||
rm vulkansdk-linux-x86_64-1.4.328.1.tar.xz && \
|
rm vulkansdk-linux-x86_64-1.4.328.1.tar.xz && \
|
||||||
@@ -78,6 +78,11 @@ RUN <<EOT bash
|
|||||||
cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/include/* /usr/include/ && \
|
cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/include/* /usr/include/ && \
|
||||||
cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/share/* /usr/share/ && \
|
cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/share/* /usr/share/ && \
|
||||||
rm -rf /opt/vulkan-sdk
|
rm -rf /opt/vulkan-sdk
|
||||||
|
elif [ "amd64" = "${TARGETARCH}}" ]; then
|
||||||
|
wget -qO- https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo tee /etc/apt/trusted.gpg.d/lunarg.asc && \
|
||||||
|
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-noble.list http://packages.lunarg.com/vulkan/lunarg-vulkan-noble.list && \
|
||||||
|
apt-get update && \
|
||||||
|
apt-get install -y vulkan-sdk
|
||||||
fi
|
fi
|
||||||
if [ "arm64" = "$TARGETARCH" ]; then
|
if [ "arm64" = "$TARGETARCH" ]; then
|
||||||
mkdir vulkan && cd vulkan && \
|
mkdir vulkan && cd vulkan && \
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
|
|
||||||
LLAMA_VERSION?=593da7fa49503b68f9f01700be9f508f1e528992
|
LLAMA_VERSION?=b1377188784f9aea26b8abde56d4aee8c733eec7
|
||||||
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
|
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
|
||||||
|
|
||||||
CMAKE_ARGS?=
|
CMAKE_ARGS?=
|
||||||
|
|||||||
@@ -17,4 +17,9 @@ if [ "x${BUILD_PROFILE}" == "xintel" ]; then
|
|||||||
fi
|
fi
|
||||||
EXTRA_PIP_INSTALL_FLAGS+=" --no-build-isolation"
|
EXTRA_PIP_INSTALL_FLAGS+=" --no-build-isolation"
|
||||||
|
|
||||||
|
if [ "x${BUILD_PROFILE}" == "xl4t12" ]; then
|
||||||
|
USE_PIP=true
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
installRequirements
|
installRequirements
|
||||||
|
|||||||
5
backend/python/chatterbox/requirements-install.txt
Normal file
5
backend/python/chatterbox/requirements-install.txt
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
# Build dependencies needed for packages installed from source (e.g., git dependencies)
|
||||||
|
# When using --no-build-isolation, these must be installed in the venv first
|
||||||
|
wheel
|
||||||
|
setuptools
|
||||||
|
packaging
|
||||||
@@ -16,6 +16,10 @@ if [ "x${BUILD_PROFILE}" == "xintel" ]; then
|
|||||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if [ "x${BUILD_PROFILE}" == "xl4t12" ]; then
|
||||||
|
USE_PIP=true
|
||||||
|
fi
|
||||||
|
|
||||||
# Use python 3.12 for l4t
|
# Use python 3.12 for l4t
|
||||||
if [ "x${BUILD_PROFILE}" == "xl4t13" ]; then
|
if [ "x${BUILD_PROFILE}" == "xl4t13" ]; then
|
||||||
PYTHON_VERSION="3.12"
|
PYTHON_VERSION="3.12"
|
||||||
|
|||||||
@@ -16,4 +16,8 @@ if [ "x${BUILD_PROFILE}" == "xintel" ]; then
|
|||||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if [ "x${BUILD_PROFILE}" == "xl4t12" ]; then
|
||||||
|
USE_PIP=true
|
||||||
|
fi
|
||||||
|
|
||||||
installRequirements
|
installRequirements
|
||||||
|
|||||||
@@ -26,6 +26,12 @@ fi
|
|||||||
|
|
||||||
EXTRA_PIP_INSTALL_FLAGS+=" --no-build-isolation"
|
EXTRA_PIP_INSTALL_FLAGS+=" --no-build-isolation"
|
||||||
|
|
||||||
|
|
||||||
|
if [ "x${BUILD_PROFILE}" == "xl4t12" ]; then
|
||||||
|
USE_PIP=true
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
git clone https://github.com/neuphonic/neutts-air neutts-air
|
git clone https://github.com/neuphonic/neutts-air neutts-air
|
||||||
|
|
||||||
cp -rfv neutts-air/neuttsair ./
|
cp -rfv neutts-air/neuttsair ./
|
||||||
|
|||||||
@@ -23,6 +23,10 @@ if [ "x${BUILD_PROFILE}" == "xl4t13" ]; then
|
|||||||
PY_STANDALONE_TAG="20251120"
|
PY_STANDALONE_TAG="20251120"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if [ "x${BUILD_PROFILE}" == "xl4t12" ]; then
|
||||||
|
USE_PIP=true
|
||||||
|
fi
|
||||||
|
|
||||||
installRequirements
|
installRequirements
|
||||||
|
|
||||||
git clone https://github.com/microsoft/VibeVoice.git
|
git clone https://github.com/microsoft/VibeVoice.git
|
||||||
|
|||||||
@@ -63,6 +63,25 @@ func (m *GalleryBackend) IsMeta() bool {
|
|||||||
return len(m.CapabilitiesMap) > 0 && m.URI == ""
|
return len(m.CapabilitiesMap) > 0 && m.URI == ""
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// IsCompatibleWith checks if the backend is compatible with the current system capability.
|
||||||
|
// For meta backends, it checks if any of the capabilities in the map match the system capability.
|
||||||
|
// For concrete backends, it delegates to SystemState.IsBackendCompatible.
|
||||||
|
func (m *GalleryBackend) IsCompatibleWith(systemState *system.SystemState) bool {
|
||||||
|
if systemState == nil {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// Meta backends are compatible if the system capability matches one of the keys
|
||||||
|
if m.IsMeta() {
|
||||||
|
capability := systemState.Capability(m.CapabilitiesMap)
|
||||||
|
_, exists := m.CapabilitiesMap[capability]
|
||||||
|
return exists
|
||||||
|
}
|
||||||
|
|
||||||
|
// For concrete backends, delegate to the system package
|
||||||
|
return systemState.IsBackendCompatible(m.Name, m.URI)
|
||||||
|
}
|
||||||
|
|
||||||
func (m *GalleryBackend) SetInstalled(installed bool) {
|
func (m *GalleryBackend) SetInstalled(installed bool) {
|
||||||
m.Installed = installed
|
m.Installed = installed
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -172,6 +172,252 @@ var _ = Describe("Gallery Backends", func() {
|
|||||||
Expect(nilMetaBackend.IsMeta()).To(BeFalse())
|
Expect(nilMetaBackend.IsMeta()).To(BeFalse())
|
||||||
})
|
})
|
||||||
|
|
||||||
|
It("should check IsCompatibleWith correctly for meta backends", func() {
|
||||||
|
metaBackend := &GalleryBackend{
|
||||||
|
Metadata: Metadata{
|
||||||
|
Name: "meta-backend",
|
||||||
|
},
|
||||||
|
CapabilitiesMap: map[string]string{
|
||||||
|
"nvidia": "nvidia-backend",
|
||||||
|
"amd": "amd-backend",
|
||||||
|
"default": "default-backend",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test with nil state - should be compatible
|
||||||
|
Expect(metaBackend.IsCompatibleWith(nil)).To(BeTrue())
|
||||||
|
|
||||||
|
// Test with NVIDIA system - should be compatible (has nvidia key)
|
||||||
|
nvidiaState := &system.SystemState{GPUVendor: "nvidia", VRAM: 8 * 1024 * 1024 * 1024}
|
||||||
|
Expect(metaBackend.IsCompatibleWith(nvidiaState)).To(BeTrue())
|
||||||
|
|
||||||
|
// Test with default (no GPU) - should be compatible (has default key)
|
||||||
|
defaultState := &system.SystemState{}
|
||||||
|
Expect(metaBackend.IsCompatibleWith(defaultState)).To(BeTrue())
|
||||||
|
})
|
||||||
|
|
||||||
|
Describe("IsCompatibleWith for concrete backends", func() {
|
||||||
|
Context("CPU backends", func() {
|
||||||
|
It("should be compatible on all systems", func() {
|
||||||
|
cpuBackend := &GalleryBackend{
|
||||||
|
Metadata: Metadata{
|
||||||
|
Name: "cpu-llama-cpp",
|
||||||
|
},
|
||||||
|
URI: "quay.io/go-skynet/local-ai-backends:latest-cpu-llama-cpp",
|
||||||
|
}
|
||||||
|
Expect(cpuBackend.IsCompatibleWith(&system.SystemState{})).To(BeTrue())
|
||||||
|
Expect(cpuBackend.IsCompatibleWith(&system.SystemState{GPUVendor: system.Nvidia, VRAM: 8 * 1024 * 1024 * 1024})).To(BeTrue())
|
||||||
|
Expect(cpuBackend.IsCompatibleWith(&system.SystemState{GPUVendor: system.AMD, VRAM: 8 * 1024 * 1024 * 1024})).To(BeTrue())
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
Context("Darwin/Metal backends", func() {
|
||||||
|
When("running on darwin", func() {
|
||||||
|
BeforeEach(func() {
|
||||||
|
if runtime.GOOS != "darwin" {
|
||||||
|
Skip("Skipping darwin-specific tests on non-darwin system")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should be compatible for MLX backend", func() {
|
||||||
|
mlxBackend := &GalleryBackend{
|
||||||
|
Metadata: Metadata{
|
||||||
|
Name: "mlx",
|
||||||
|
},
|
||||||
|
URI: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-mlx",
|
||||||
|
}
|
||||||
|
Expect(mlxBackend.IsCompatibleWith(&system.SystemState{})).To(BeTrue())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should be compatible for metal-llama-cpp backend", func() {
|
||||||
|
metalBackend := &GalleryBackend{
|
||||||
|
Metadata: Metadata{
|
||||||
|
Name: "metal-llama-cpp",
|
||||||
|
},
|
||||||
|
URI: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-llama-cpp",
|
||||||
|
}
|
||||||
|
Expect(metalBackend.IsCompatibleWith(&system.SystemState{})).To(BeTrue())
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
When("running on non-darwin", func() {
|
||||||
|
BeforeEach(func() {
|
||||||
|
if runtime.GOOS == "darwin" {
|
||||||
|
Skip("Skipping non-darwin-specific tests on darwin system")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should NOT be compatible for MLX backend", func() {
|
||||||
|
mlxBackend := &GalleryBackend{
|
||||||
|
Metadata: Metadata{
|
||||||
|
Name: "mlx",
|
||||||
|
},
|
||||||
|
URI: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-mlx",
|
||||||
|
}
|
||||||
|
Expect(mlxBackend.IsCompatibleWith(&system.SystemState{})).To(BeFalse())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should NOT be compatible for metal-llama-cpp backend", func() {
|
||||||
|
metalBackend := &GalleryBackend{
|
||||||
|
Metadata: Metadata{
|
||||||
|
Name: "metal-llama-cpp",
|
||||||
|
},
|
||||||
|
URI: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-llama-cpp",
|
||||||
|
}
|
||||||
|
Expect(metalBackend.IsCompatibleWith(&system.SystemState{})).To(BeFalse())
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
Context("NVIDIA/CUDA backends", func() {
|
||||||
|
When("running on non-darwin", func() {
|
||||||
|
BeforeEach(func() {
|
||||||
|
if runtime.GOOS == "darwin" {
|
||||||
|
Skip("Skipping CUDA tests on darwin system")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should NOT be compatible without nvidia GPU", func() {
|
||||||
|
cudaBackend := &GalleryBackend{
|
||||||
|
Metadata: Metadata{
|
||||||
|
Name: "cuda12-llama-cpp",
|
||||||
|
},
|
||||||
|
URI: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-llama-cpp",
|
||||||
|
}
|
||||||
|
Expect(cudaBackend.IsCompatibleWith(&system.SystemState{})).To(BeFalse())
|
||||||
|
Expect(cudaBackend.IsCompatibleWith(&system.SystemState{GPUVendor: system.AMD, VRAM: 8 * 1024 * 1024 * 1024})).To(BeFalse())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should be compatible with nvidia GPU", func() {
|
||||||
|
cudaBackend := &GalleryBackend{
|
||||||
|
Metadata: Metadata{
|
||||||
|
Name: "cuda12-llama-cpp",
|
||||||
|
},
|
||||||
|
URI: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-llama-cpp",
|
||||||
|
}
|
||||||
|
Expect(cudaBackend.IsCompatibleWith(&system.SystemState{GPUVendor: system.Nvidia, VRAM: 8 * 1024 * 1024 * 1024})).To(BeTrue())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should be compatible with cuda13 backend on nvidia GPU", func() {
|
||||||
|
cuda13Backend := &GalleryBackend{
|
||||||
|
Metadata: Metadata{
|
||||||
|
Name: "cuda13-llama-cpp",
|
||||||
|
},
|
||||||
|
URI: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-llama-cpp",
|
||||||
|
}
|
||||||
|
Expect(cuda13Backend.IsCompatibleWith(&system.SystemState{GPUVendor: system.Nvidia, VRAM: 8 * 1024 * 1024 * 1024})).To(BeTrue())
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
Context("AMD/ROCm backends", func() {
|
||||||
|
When("running on non-darwin", func() {
|
||||||
|
BeforeEach(func() {
|
||||||
|
if runtime.GOOS == "darwin" {
|
||||||
|
Skip("Skipping AMD/ROCm tests on darwin system")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should NOT be compatible without AMD GPU", func() {
|
||||||
|
rocmBackend := &GalleryBackend{
|
||||||
|
Metadata: Metadata{
|
||||||
|
Name: "rocm-llama-cpp",
|
||||||
|
},
|
||||||
|
URI: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-llama-cpp",
|
||||||
|
}
|
||||||
|
Expect(rocmBackend.IsCompatibleWith(&system.SystemState{})).To(BeFalse())
|
||||||
|
Expect(rocmBackend.IsCompatibleWith(&system.SystemState{GPUVendor: system.Nvidia, VRAM: 8 * 1024 * 1024 * 1024})).To(BeFalse())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should be compatible with AMD GPU", func() {
|
||||||
|
rocmBackend := &GalleryBackend{
|
||||||
|
Metadata: Metadata{
|
||||||
|
Name: "rocm-llama-cpp",
|
||||||
|
},
|
||||||
|
URI: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-llama-cpp",
|
||||||
|
}
|
||||||
|
Expect(rocmBackend.IsCompatibleWith(&system.SystemState{GPUVendor: system.AMD, VRAM: 8 * 1024 * 1024 * 1024})).To(BeTrue())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should be compatible with hipblas backend on AMD GPU", func() {
|
||||||
|
hipBackend := &GalleryBackend{
|
||||||
|
Metadata: Metadata{
|
||||||
|
Name: "hip-llama-cpp",
|
||||||
|
},
|
||||||
|
URI: "quay.io/go-skynet/local-ai-backends:latest-gpu-hip-llama-cpp",
|
||||||
|
}
|
||||||
|
Expect(hipBackend.IsCompatibleWith(&system.SystemState{GPUVendor: system.AMD, VRAM: 8 * 1024 * 1024 * 1024})).To(BeTrue())
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
Context("Intel/SYCL backends", func() {
|
||||||
|
When("running on non-darwin", func() {
|
||||||
|
BeforeEach(func() {
|
||||||
|
if runtime.GOOS == "darwin" {
|
||||||
|
Skip("Skipping Intel/SYCL tests on darwin system")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should NOT be compatible without Intel GPU", func() {
|
||||||
|
intelBackend := &GalleryBackend{
|
||||||
|
Metadata: Metadata{
|
||||||
|
Name: "intel-sycl-f16-llama-cpp",
|
||||||
|
},
|
||||||
|
URI: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-llama-cpp",
|
||||||
|
}
|
||||||
|
Expect(intelBackend.IsCompatibleWith(&system.SystemState{})).To(BeFalse())
|
||||||
|
Expect(intelBackend.IsCompatibleWith(&system.SystemState{GPUVendor: system.Nvidia, VRAM: 8 * 1024 * 1024 * 1024})).To(BeFalse())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should be compatible with Intel GPU", func() {
|
||||||
|
intelBackend := &GalleryBackend{
|
||||||
|
Metadata: Metadata{
|
||||||
|
Name: "intel-sycl-f16-llama-cpp",
|
||||||
|
},
|
||||||
|
URI: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-llama-cpp",
|
||||||
|
}
|
||||||
|
Expect(intelBackend.IsCompatibleWith(&system.SystemState{GPUVendor: system.Intel, VRAM: 8 * 1024 * 1024 * 1024})).To(BeTrue())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should be compatible with intel-sycl-f32 backend on Intel GPU", func() {
|
||||||
|
intelF32Backend := &GalleryBackend{
|
||||||
|
Metadata: Metadata{
|
||||||
|
Name: "intel-sycl-f32-llama-cpp",
|
||||||
|
},
|
||||||
|
URI: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-llama-cpp",
|
||||||
|
}
|
||||||
|
Expect(intelF32Backend.IsCompatibleWith(&system.SystemState{GPUVendor: system.Intel, VRAM: 8 * 1024 * 1024 * 1024})).To(BeTrue())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should be compatible with intel-transformers backend on Intel GPU", func() {
|
||||||
|
intelTransformersBackend := &GalleryBackend{
|
||||||
|
Metadata: Metadata{
|
||||||
|
Name: "intel-transformers",
|
||||||
|
},
|
||||||
|
URI: "quay.io/go-skynet/local-ai-backends:latest-intel-transformers",
|
||||||
|
}
|
||||||
|
Expect(intelTransformersBackend.IsCompatibleWith(&system.SystemState{GPUVendor: system.Intel, VRAM: 8 * 1024 * 1024 * 1024})).To(BeTrue())
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
Context("Vulkan backends", func() {
|
||||||
|
It("should be compatible on CPU-only systems", func() {
|
||||||
|
// Vulkan backends don't have a specific GPU vendor requirement in the current logic
|
||||||
|
// They are compatible if no other GPU-specific pattern matches
|
||||||
|
vulkanBackend := &GalleryBackend{
|
||||||
|
Metadata: Metadata{
|
||||||
|
Name: "vulkan-llama-cpp",
|
||||||
|
},
|
||||||
|
URI: "quay.io/go-skynet/local-ai-backends:latest-gpu-vulkan-llama-cpp",
|
||||||
|
}
|
||||||
|
// Vulkan doesn't have vendor-specific filtering in current implementation
|
||||||
|
Expect(vulkanBackend.IsCompatibleWith(&system.SystemState{})).To(BeTrue())
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
It("should find best backend from meta based on system capabilities", func() {
|
It("should find best backend from meta based on system capabilities", func() {
|
||||||
|
|
||||||
metaBackend := &GalleryBackend{
|
metaBackend := &GalleryBackend{
|
||||||
|
|||||||
@@ -226,6 +226,16 @@ func AvailableGalleryModels(galleries []config.Gallery, systemState *system.Syst
|
|||||||
|
|
||||||
// List available backends
|
// List available backends
|
||||||
func AvailableBackends(galleries []config.Gallery, systemState *system.SystemState) (GalleryElements[*GalleryBackend], error) {
|
func AvailableBackends(galleries []config.Gallery, systemState *system.SystemState) (GalleryElements[*GalleryBackend], error) {
|
||||||
|
return availableBackendsWithFilter(galleries, systemState, true)
|
||||||
|
}
|
||||||
|
|
||||||
|
// AvailableBackendsUnfiltered returns all available backends without filtering by system capability.
|
||||||
|
func AvailableBackendsUnfiltered(galleries []config.Gallery, systemState *system.SystemState) (GalleryElements[*GalleryBackend], error) {
|
||||||
|
return availableBackendsWithFilter(galleries, systemState, false)
|
||||||
|
}
|
||||||
|
|
||||||
|
// availableBackendsWithFilter is a helper function that lists available backends with optional filtering.
|
||||||
|
func availableBackendsWithFilter(galleries []config.Gallery, systemState *system.SystemState, filterByCapability bool) (GalleryElements[*GalleryBackend], error) {
|
||||||
var backends []*GalleryBackend
|
var backends []*GalleryBackend
|
||||||
|
|
||||||
systemBackends, err := ListSystemBackends(systemState)
|
systemBackends, err := ListSystemBackends(systemState)
|
||||||
@@ -241,7 +251,17 @@ func AvailableBackends(galleries []config.Gallery, systemState *system.SystemSta
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
backends = append(backends, galleryBackends...)
|
|
||||||
|
// Filter backends by system capability if requested
|
||||||
|
if filterByCapability {
|
||||||
|
for _, backend := range galleryBackends {
|
||||||
|
if backend.IsCompatibleWith(systemState) {
|
||||||
|
backends = append(backends, backend)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
backends = append(backends, galleryBackends...)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return backends, nil
|
return backends, nil
|
||||||
|
|||||||
@@ -617,6 +617,12 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
|
|||||||
installedBackendsCount = len(installedBackends)
|
installedBackendsCount = len(installedBackends)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Get the detected system capability
|
||||||
|
detectedCapability := ""
|
||||||
|
if appConfig.SystemState != nil {
|
||||||
|
detectedCapability = appConfig.SystemState.DetectedCapability()
|
||||||
|
}
|
||||||
|
|
||||||
return c.JSON(200, map[string]interface{}{
|
return c.JSON(200, map[string]interface{}{
|
||||||
"backends": backendsJSON,
|
"backends": backendsJSON,
|
||||||
"repositories": appConfig.BackendGalleries,
|
"repositories": appConfig.BackendGalleries,
|
||||||
@@ -629,6 +635,7 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
|
|||||||
"totalPages": totalPages,
|
"totalPages": totalPages,
|
||||||
"prevPage": prevPage,
|
"prevPage": prevPage,
|
||||||
"nextPage": nextPage,
|
"nextPage": nextPage,
|
||||||
|
"systemCapability": detectedCapability,
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|||||||
@@ -54,6 +54,11 @@
|
|||||||
<span class="font-semibold text-cyan-300" x-text="installedBackends"></span>
|
<span class="font-semibold text-cyan-300" x-text="installedBackends"></span>
|
||||||
<span class="text-[#94A3B8] ml-1">installed</span>
|
<span class="text-[#94A3B8] ml-1">installed</span>
|
||||||
</a>
|
</a>
|
||||||
|
<div class="flex items-center bg-[#101827] rounded-lg px-4 py-2 border border-[#38BDF8]/30">
|
||||||
|
<i class="fas fa-microchip text-[#38BDF8] mr-2"></i>
|
||||||
|
<span class="text-[#94A3B8] mr-1">Capability:</span>
|
||||||
|
<span class="font-semibold text-[#38BDF8]" x-text="systemCapability"></span>
|
||||||
|
</div>
|
||||||
<a href="https://localai.io/backends/" target="_blank" class="btn-primary">
|
<a href="https://localai.io/backends/" target="_blank" class="btn-primary">
|
||||||
<i class="fas fa-info-circle mr-2"></i>
|
<i class="fas fa-info-circle mr-2"></i>
|
||||||
<span>Documentation</span>
|
<span>Documentation</span>
|
||||||
@@ -588,6 +593,7 @@ function backendsGallery() {
|
|||||||
totalPages: 1,
|
totalPages: 1,
|
||||||
availableBackends: 0,
|
availableBackends: 0,
|
||||||
installedBackends: 0,
|
installedBackends: 0,
|
||||||
|
systemCapability: '',
|
||||||
selectedBackend: null,
|
selectedBackend: null,
|
||||||
jobProgress: {},
|
jobProgress: {},
|
||||||
notifications: [],
|
notifications: [],
|
||||||
@@ -683,6 +689,7 @@ function backendsGallery() {
|
|||||||
this.totalPages = data.totalPages || 1;
|
this.totalPages = data.totalPages || 1;
|
||||||
this.availableBackends = data.availableBackends || 0;
|
this.availableBackends = data.availableBackends || 0;
|
||||||
this.installedBackends = data.installedBackends || 0;
|
this.installedBackends = data.installedBackends || 0;
|
||||||
|
this.systemCapability = data.systemCapability || 'default';
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Error fetching backends:', error);
|
console.error('Error fetching backends:', error);
|
||||||
} finally {
|
} finally {
|
||||||
|
|||||||
@@ -24,8 +24,6 @@ func (ml *ModelLoader) deleteProcess(s string) error {
|
|||||||
return fmt.Errorf("model %s not found", s)
|
return fmt.Errorf("model %s not found", s)
|
||||||
}
|
}
|
||||||
|
|
||||||
defer delete(ml.models, s)
|
|
||||||
|
|
||||||
retries := 1
|
retries := 1
|
||||||
for model.GRPC(false, ml.wd).IsBusy() {
|
for model.GRPC(false, ml.wd).IsBusy() {
|
||||||
xlog.Debug("Model busy. Waiting.", "model", s)
|
xlog.Debug("Model busy. Waiting.", "model", s)
|
||||||
@@ -48,6 +46,7 @@ func (ml *ModelLoader) deleteProcess(s string) error {
|
|||||||
if process == nil {
|
if process == nil {
|
||||||
xlog.Error("No process", "model", s)
|
xlog.Error("No process", "model", s)
|
||||||
// Nothing to do as there is no process
|
// Nothing to do as there is no process
|
||||||
|
delete(ml.models, s)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -56,6 +55,10 @@ func (ml *ModelLoader) deleteProcess(s string) error {
|
|||||||
xlog.Error("(deleteProcess) error while deleting process", "error", err, "model", s)
|
xlog.Error("(deleteProcess) error while deleting process", "error", err, "model", s)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if err == nil {
|
||||||
|
delete(ml.models, s)
|
||||||
|
}
|
||||||
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -12,15 +12,17 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
|
// Public constants - used by tests and external packages
|
||||||
|
Nvidia = "nvidia"
|
||||||
|
AMD = "amd"
|
||||||
|
Intel = "intel"
|
||||||
|
|
||||||
|
// Private constants - only used within this package
|
||||||
defaultCapability = "default"
|
defaultCapability = "default"
|
||||||
nvidiaL4T = "nvidia-l4t"
|
nvidiaL4T = "nvidia-l4t"
|
||||||
darwinX86 = "darwin-x86"
|
darwinX86 = "darwin-x86"
|
||||||
metal = "metal"
|
metal = "metal"
|
||||||
nvidia = "nvidia"
|
vulkan = "vulkan"
|
||||||
|
|
||||||
amd = "amd"
|
|
||||||
intel = "intel"
|
|
||||||
vulkan = "vulkan"
|
|
||||||
|
|
||||||
nvidiaCuda13 = "nvidia-cuda-13"
|
nvidiaCuda13 = "nvidia-cuda-13"
|
||||||
nvidiaCuda12 = "nvidia-cuda-12"
|
nvidiaCuda12 = "nvidia-cuda-12"
|
||||||
@@ -30,6 +32,16 @@ const (
|
|||||||
capabilityEnv = "LOCALAI_FORCE_META_BACKEND_CAPABILITY"
|
capabilityEnv = "LOCALAI_FORCE_META_BACKEND_CAPABILITY"
|
||||||
capabilityRunFileEnv = "LOCALAI_FORCE_META_BACKEND_CAPABILITY_RUN_FILE"
|
capabilityRunFileEnv = "LOCALAI_FORCE_META_BACKEND_CAPABILITY_RUN_FILE"
|
||||||
defaultRunFile = "/run/localai/capability"
|
defaultRunFile = "/run/localai/capability"
|
||||||
|
|
||||||
|
// Backend detection tokens (private)
|
||||||
|
backendTokenDarwin = "darwin"
|
||||||
|
backendTokenMLX = "mlx"
|
||||||
|
backendTokenMetal = "metal"
|
||||||
|
backendTokenL4T = "l4t"
|
||||||
|
backendTokenCUDA = "cuda"
|
||||||
|
backendTokenROCM = "rocm"
|
||||||
|
backendTokenHIP = "hip"
|
||||||
|
backendTokenSYCL = "sycl"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
@@ -96,7 +108,7 @@ func (s *SystemState) getSystemCapabilities() string {
|
|||||||
|
|
||||||
// If arm64 on linux and a nvidia gpu is detected, we will return nvidia-l4t
|
// If arm64 on linux and a nvidia gpu is detected, we will return nvidia-l4t
|
||||||
if runtime.GOOS == "linux" && runtime.GOARCH == "arm64" {
|
if runtime.GOOS == "linux" && runtime.GOARCH == "arm64" {
|
||||||
if s.GPUVendor == nvidia {
|
if s.GPUVendor == Nvidia {
|
||||||
xlog.Info("Using nvidia-l4t capability (arm64 on linux)", "env", capabilityEnv)
|
xlog.Info("Using nvidia-l4t capability (arm64 on linux)", "env", capabilityEnv)
|
||||||
if cuda13DirExists {
|
if cuda13DirExists {
|
||||||
return nvidiaL4TCuda13
|
return nvidiaL4TCuda13
|
||||||
@@ -131,7 +143,6 @@ func (s *SystemState) getSystemCapabilities() string {
|
|||||||
return s.GPUVendor
|
return s.GPUVendor
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// BackendPreferenceTokens returns a list of substrings that represent the preferred
|
// BackendPreferenceTokens returns a list of substrings that represent the preferred
|
||||||
// backend implementation order for the current system capability. Callers can use
|
// backend implementation order for the current system capability. Callers can use
|
||||||
// these tokens to select the most appropriate concrete backend among multiple
|
// these tokens to select the most appropriate concrete backend among multiple
|
||||||
@@ -139,19 +150,76 @@ func (s *SystemState) getSystemCapabilities() string {
|
|||||||
func (s *SystemState) BackendPreferenceTokens() []string {
|
func (s *SystemState) BackendPreferenceTokens() []string {
|
||||||
capStr := strings.ToLower(s.getSystemCapabilities())
|
capStr := strings.ToLower(s.getSystemCapabilities())
|
||||||
switch {
|
switch {
|
||||||
case strings.HasPrefix(capStr, nvidia):
|
case strings.HasPrefix(capStr, Nvidia):
|
||||||
return []string{"cuda", "vulkan", "cpu"}
|
return []string{backendTokenCUDA, vulkan, "cpu"}
|
||||||
case strings.HasPrefix(capStr, amd):
|
case strings.HasPrefix(capStr, AMD):
|
||||||
return []string{"rocm", "hip", "vulkan", "cpu"}
|
return []string{backendTokenROCM, backendTokenHIP, vulkan, "cpu"}
|
||||||
case strings.HasPrefix(capStr, intel):
|
case strings.HasPrefix(capStr, Intel):
|
||||||
return []string{"sycl", intel, "cpu"}
|
return []string{backendTokenSYCL, Intel, "cpu"}
|
||||||
case strings.HasPrefix(capStr, metal):
|
case strings.HasPrefix(capStr, metal):
|
||||||
return []string{"metal", "cpu"}
|
return []string{backendTokenMetal, "cpu"}
|
||||||
case strings.HasPrefix(capStr, darwinX86):
|
case strings.HasPrefix(capStr, darwinX86):
|
||||||
return []string{"darwin-x86", "cpu"}
|
return []string{"darwin-x86", "cpu"}
|
||||||
case strings.HasPrefix(capStr, vulkan):
|
case strings.HasPrefix(capStr, vulkan):
|
||||||
return []string{"vulkan", "cpu"}
|
return []string{vulkan, "cpu"}
|
||||||
default:
|
default:
|
||||||
return []string{"cpu"}
|
return []string{"cpu"}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// DetectedCapability returns the detected system capability string.
|
||||||
|
// This can be used by the UI to display what capability was detected.
|
||||||
|
func (s *SystemState) DetectedCapability() string {
|
||||||
|
return s.getSystemCapabilities()
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsBackendCompatible checks if a backend (identified by name and URI) is compatible
|
||||||
|
// with the current system capability. This function uses getSystemCapabilities to ensure
|
||||||
|
// consistency with capability detection (including VRAM checks, environment overrides, etc.).
|
||||||
|
func (s *SystemState) IsBackendCompatible(name, uri string) bool {
|
||||||
|
combined := strings.ToLower(name + " " + uri)
|
||||||
|
capability := s.getSystemCapabilities()
|
||||||
|
|
||||||
|
// Check for darwin/macOS-specific backends (mlx, metal, darwin)
|
||||||
|
isDarwinBackend := strings.Contains(combined, backendTokenDarwin) ||
|
||||||
|
strings.Contains(combined, backendTokenMLX) ||
|
||||||
|
strings.Contains(combined, backendTokenMetal)
|
||||||
|
if isDarwinBackend {
|
||||||
|
// Darwin backends require the system to be running on darwin with metal or darwin-x86 capability
|
||||||
|
return capability == metal || capability == darwinX86
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for NVIDIA L4T-specific backends (arm64 Linux with NVIDIA GPU)
|
||||||
|
// This must be checked before the general NVIDIA check as L4T backends
|
||||||
|
// may also contain "cuda" or "nvidia" in their names
|
||||||
|
isL4TBackend := strings.Contains(combined, backendTokenL4T)
|
||||||
|
if isL4TBackend {
|
||||||
|
return strings.HasPrefix(capability, nvidiaL4T)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for NVIDIA/CUDA-specific backends (non-L4T)
|
||||||
|
isNvidiaBackend := strings.Contains(combined, backendTokenCUDA) ||
|
||||||
|
strings.Contains(combined, Nvidia)
|
||||||
|
if isNvidiaBackend {
|
||||||
|
// NVIDIA backends are compatible with nvidia, nvidia-cuda-12, nvidia-cuda-13, and l4t capabilities
|
||||||
|
return strings.HasPrefix(capability, Nvidia)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for AMD/ROCm-specific backends
|
||||||
|
isAMDBackend := strings.Contains(combined, backendTokenROCM) ||
|
||||||
|
strings.Contains(combined, backendTokenHIP) ||
|
||||||
|
strings.Contains(combined, AMD)
|
||||||
|
if isAMDBackend {
|
||||||
|
return capability == AMD
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for Intel/SYCL-specific backends
|
||||||
|
isIntelBackend := strings.Contains(combined, backendTokenSYCL) ||
|
||||||
|
strings.Contains(combined, Intel)
|
||||||
|
if isIntelBackend {
|
||||||
|
return capability == Intel
|
||||||
|
}
|
||||||
|
|
||||||
|
// CPU backends are always compatible
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|||||||
191
swagger/docs.go
191
swagger/docs.go
@@ -1198,6 +1198,30 @@ const docTemplate = `{
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"/v1/messages": {
|
||||||
|
"post": {
|
||||||
|
"summary": "Generate a message response for the given messages and model.",
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"description": "query params",
|
||||||
|
"name": "request",
|
||||||
|
"in": "body",
|
||||||
|
"required": true,
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/definitions/schema.AnthropicRequest"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "Response",
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/definitions/schema.AnthropicResponse"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"/v1/models": {
|
"/v1/models": {
|
||||||
"get": {
|
"get": {
|
||||||
"summary": "List and describe the various models available in the API.",
|
"summary": "List and describe the various models available in the API.",
|
||||||
@@ -1739,6 +1763,169 @@ const docTemplate = `{
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"schema.AnthropicContentBlock": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"content": {},
|
||||||
|
"id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"input": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": true
|
||||||
|
},
|
||||||
|
"is_error": {
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"name": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"source": {
|
||||||
|
"$ref": "#/definitions/schema.AnthropicImageSource"
|
||||||
|
},
|
||||||
|
"text": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"tool_use_id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"type": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"schema.AnthropicImageSource": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"data": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"media_type": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"type": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"schema.AnthropicMessage": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"content": {},
|
||||||
|
"role": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"schema.AnthropicRequest": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"max_tokens": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"messages": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"$ref": "#/definitions/schema.AnthropicMessage"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"metadata": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"model": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"stop_sequences": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"stream": {
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"system": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"temperature": {
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
"tool_choice": {},
|
||||||
|
"tools": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"$ref": "#/definitions/schema.AnthropicTool"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"top_k": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"top_p": {
|
||||||
|
"type": "number"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"schema.AnthropicResponse": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"content": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"$ref": "#/definitions/schema.AnthropicContentBlock"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"model": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"role": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"stop_reason": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"stop_sequence": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"type": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"usage": {
|
||||||
|
"$ref": "#/definitions/schema.AnthropicUsage"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"schema.AnthropicTool": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"description": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"input_schema": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": true
|
||||||
|
},
|
||||||
|
"name": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"schema.AnthropicUsage": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"input_tokens": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"output_tokens": {
|
||||||
|
"type": "integer"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"schema.BackendMonitorRequest": {
|
"schema.BackendMonitorRequest": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
@@ -2229,6 +2416,10 @@ const docTemplate = `{
|
|||||||
"description": "The message name (used for tools calls)",
|
"description": "The message name (used for tools calls)",
|
||||||
"type": "string"
|
"type": "string"
|
||||||
},
|
},
|
||||||
|
"reasoning": {
|
||||||
|
"description": "Reasoning content extracted from \u003cthinking\u003e...\u003c/thinking\u003e tags",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
"role": {
|
"role": {
|
||||||
"description": "The message role",
|
"description": "The message role",
|
||||||
"type": "string"
|
"type": "string"
|
||||||
|
|||||||
@@ -1191,6 +1191,30 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"/v1/messages": {
|
||||||
|
"post": {
|
||||||
|
"summary": "Generate a message response for the given messages and model.",
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"description": "query params",
|
||||||
|
"name": "request",
|
||||||
|
"in": "body",
|
||||||
|
"required": true,
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/definitions/schema.AnthropicRequest"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "Response",
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/definitions/schema.AnthropicResponse"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"/v1/models": {
|
"/v1/models": {
|
||||||
"get": {
|
"get": {
|
||||||
"summary": "List and describe the various models available in the API.",
|
"summary": "List and describe the various models available in the API.",
|
||||||
@@ -1732,6 +1756,169 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"schema.AnthropicContentBlock": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"content": {},
|
||||||
|
"id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"input": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": true
|
||||||
|
},
|
||||||
|
"is_error": {
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"name": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"source": {
|
||||||
|
"$ref": "#/definitions/schema.AnthropicImageSource"
|
||||||
|
},
|
||||||
|
"text": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"tool_use_id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"type": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"schema.AnthropicImageSource": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"data": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"media_type": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"type": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"schema.AnthropicMessage": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"content": {},
|
||||||
|
"role": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"schema.AnthropicRequest": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"max_tokens": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"messages": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"$ref": "#/definitions/schema.AnthropicMessage"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"metadata": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"model": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"stop_sequences": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"stream": {
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"system": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"temperature": {
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
"tool_choice": {},
|
||||||
|
"tools": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"$ref": "#/definitions/schema.AnthropicTool"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"top_k": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"top_p": {
|
||||||
|
"type": "number"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"schema.AnthropicResponse": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"content": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"$ref": "#/definitions/schema.AnthropicContentBlock"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"model": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"role": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"stop_reason": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"stop_sequence": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"type": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"usage": {
|
||||||
|
"$ref": "#/definitions/schema.AnthropicUsage"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"schema.AnthropicTool": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"description": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"input_schema": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": true
|
||||||
|
},
|
||||||
|
"name": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"schema.AnthropicUsage": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"input_tokens": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"output_tokens": {
|
||||||
|
"type": "integer"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"schema.BackendMonitorRequest": {
|
"schema.BackendMonitorRequest": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
@@ -2222,6 +2409,10 @@
|
|||||||
"description": "The message name (used for tools calls)",
|
"description": "The message name (used for tools calls)",
|
||||||
"type": "string"
|
"type": "string"
|
||||||
},
|
},
|
||||||
|
"reasoning": {
|
||||||
|
"description": "Reasoning content extracted from \u003cthinking\u003e...\u003c/thinking\u003e tags",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
"role": {
|
"role": {
|
||||||
"description": "The message role",
|
"description": "The message role",
|
||||||
"type": "string"
|
"type": "string"
|
||||||
|
|||||||
@@ -239,6 +239,114 @@ definitions:
|
|||||||
start:
|
start:
|
||||||
type: number
|
type: number
|
||||||
type: object
|
type: object
|
||||||
|
schema.AnthropicContentBlock:
|
||||||
|
properties:
|
||||||
|
content: {}
|
||||||
|
id:
|
||||||
|
type: string
|
||||||
|
input:
|
||||||
|
additionalProperties: true
|
||||||
|
type: object
|
||||||
|
is_error:
|
||||||
|
type: boolean
|
||||||
|
name:
|
||||||
|
type: string
|
||||||
|
source:
|
||||||
|
$ref: '#/definitions/schema.AnthropicImageSource'
|
||||||
|
text:
|
||||||
|
type: string
|
||||||
|
tool_use_id:
|
||||||
|
type: string
|
||||||
|
type:
|
||||||
|
type: string
|
||||||
|
type: object
|
||||||
|
schema.AnthropicImageSource:
|
||||||
|
properties:
|
||||||
|
data:
|
||||||
|
type: string
|
||||||
|
media_type:
|
||||||
|
type: string
|
||||||
|
type:
|
||||||
|
type: string
|
||||||
|
type: object
|
||||||
|
schema.AnthropicMessage:
|
||||||
|
properties:
|
||||||
|
content: {}
|
||||||
|
role:
|
||||||
|
type: string
|
||||||
|
type: object
|
||||||
|
schema.AnthropicRequest:
|
||||||
|
properties:
|
||||||
|
max_tokens:
|
||||||
|
type: integer
|
||||||
|
messages:
|
||||||
|
items:
|
||||||
|
$ref: '#/definitions/schema.AnthropicMessage'
|
||||||
|
type: array
|
||||||
|
metadata:
|
||||||
|
additionalProperties:
|
||||||
|
type: string
|
||||||
|
type: object
|
||||||
|
model:
|
||||||
|
type: string
|
||||||
|
stop_sequences:
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
|
stream:
|
||||||
|
type: boolean
|
||||||
|
system:
|
||||||
|
type: string
|
||||||
|
temperature:
|
||||||
|
type: number
|
||||||
|
tool_choice: {}
|
||||||
|
tools:
|
||||||
|
items:
|
||||||
|
$ref: '#/definitions/schema.AnthropicTool'
|
||||||
|
type: array
|
||||||
|
top_k:
|
||||||
|
type: integer
|
||||||
|
top_p:
|
||||||
|
type: number
|
||||||
|
type: object
|
||||||
|
schema.AnthropicResponse:
|
||||||
|
properties:
|
||||||
|
content:
|
||||||
|
items:
|
||||||
|
$ref: '#/definitions/schema.AnthropicContentBlock'
|
||||||
|
type: array
|
||||||
|
id:
|
||||||
|
type: string
|
||||||
|
model:
|
||||||
|
type: string
|
||||||
|
role:
|
||||||
|
type: string
|
||||||
|
stop_reason:
|
||||||
|
type: string
|
||||||
|
stop_sequence:
|
||||||
|
type: string
|
||||||
|
type:
|
||||||
|
type: string
|
||||||
|
usage:
|
||||||
|
$ref: '#/definitions/schema.AnthropicUsage'
|
||||||
|
type: object
|
||||||
|
schema.AnthropicTool:
|
||||||
|
properties:
|
||||||
|
description:
|
||||||
|
type: string
|
||||||
|
input_schema:
|
||||||
|
additionalProperties: true
|
||||||
|
type: object
|
||||||
|
name:
|
||||||
|
type: string
|
||||||
|
type: object
|
||||||
|
schema.AnthropicUsage:
|
||||||
|
properties:
|
||||||
|
input_tokens:
|
||||||
|
type: integer
|
||||||
|
output_tokens:
|
||||||
|
type: integer
|
||||||
|
type: object
|
||||||
schema.BackendMonitorRequest:
|
schema.BackendMonitorRequest:
|
||||||
properties:
|
properties:
|
||||||
model:
|
model:
|
||||||
@@ -573,6 +681,9 @@ definitions:
|
|||||||
name:
|
name:
|
||||||
description: The message name (used for tools calls)
|
description: The message name (used for tools calls)
|
||||||
type: string
|
type: string
|
||||||
|
reasoning:
|
||||||
|
description: Reasoning content extracted from <thinking>...</thinking> tags
|
||||||
|
type: string
|
||||||
role:
|
role:
|
||||||
description: The message role
|
description: The message role
|
||||||
type: string
|
type: string
|
||||||
@@ -1813,6 +1924,21 @@ paths:
|
|||||||
schema:
|
schema:
|
||||||
$ref: '#/definitions/schema.OpenAIResponse'
|
$ref: '#/definitions/schema.OpenAIResponse'
|
||||||
summary: Stream MCP chat completions with reasoning, tool calls, and results
|
summary: Stream MCP chat completions with reasoning, tool calls, and results
|
||||||
|
/v1/messages:
|
||||||
|
post:
|
||||||
|
parameters:
|
||||||
|
- description: query params
|
||||||
|
in: body
|
||||||
|
name: request
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
$ref: '#/definitions/schema.AnthropicRequest'
|
||||||
|
responses:
|
||||||
|
"200":
|
||||||
|
description: Response
|
||||||
|
schema:
|
||||||
|
$ref: '#/definitions/schema.AnthropicResponse'
|
||||||
|
summary: Generate a message response for the given messages and model.
|
||||||
/v1/models:
|
/v1/models:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
|
|||||||
Reference in New Issue
Block a user