mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-04 11:42:57 -05:00
Compare commits
2 Commits
workaround
...
copilot/up
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a1480707df | ||
|
|
6e1ecc9be7 |
@@ -16,7 +16,7 @@ RUN apt-get update && \
|
||||
|
||||
# The requirements-drivers target is for BUILD_TYPE specific items. If you need to install something specific to CUDA, or specific to ROCM, it goes here.
|
||||
FROM requirements AS requirements-drivers
|
||||
ARG VULKAN_FROM_SOURCE=false
|
||||
|
||||
ARG BUILD_TYPE
|
||||
ARG CUDA_MAJOR_VERSION=12
|
||||
ARG CUDA_MINOR_VERSION=0
|
||||
@@ -41,7 +41,7 @@ RUN <<EOT bash
|
||||
git python-is-python3 bison libx11-xcb-dev liblz4-dev libzstd-dev \
|
||||
ocaml-core ninja-build pkg-config libxml2-dev wayland-protocols python3-jsonschema \
|
||||
clang-format qtbase5-dev qt6-base-dev libxcb-glx0-dev sudo xz-utils mesa-vulkan-drivers
|
||||
if [ "amd64" = "$TARGETARCH" ] && [ "${VULKAN_FROM_SOURCE}" = "true" ]; then
|
||||
if [ "amd64" = "$TARGETARCH" ]; then
|
||||
wget "https://sdk.lunarg.com/sdk/download/1.4.328.1/linux/vulkansdk-linux-x86_64-1.4.328.1.tar.xz" && \
|
||||
tar -xf vulkansdk-linux-x86_64-1.4.328.1.tar.xz && \
|
||||
rm vulkansdk-linux-x86_64-1.4.328.1.tar.xz && \
|
||||
@@ -59,11 +59,6 @@ RUN <<EOT bash
|
||||
cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/include/* /usr/include/ && \
|
||||
cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/share/* /usr/share/ && \
|
||||
rm -rf /opt/vulkan-sdk
|
||||
elif [ "amd64" = "${TARGETARCH}}" ]; then
|
||||
wget -qO- https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo tee /etc/apt/trusted.gpg.d/lunarg.asc && \
|
||||
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-noble.list http://packages.lunarg.com/vulkan/lunarg-vulkan-noble.list && \
|
||||
apt-get update && \
|
||||
apt-get install -y vulkan-sdk
|
||||
fi
|
||||
if [ "arm64" = "$TARGETARCH" ]; then
|
||||
mkdir vulkan && cd vulkan && \
|
||||
|
||||
@@ -46,7 +46,7 @@ RUN <<EOT bash
|
||||
git python-is-python3 bison libx11-xcb-dev liblz4-dev libzstd-dev \
|
||||
ocaml-core ninja-build pkg-config libxml2-dev wayland-protocols python3-jsonschema \
|
||||
clang-format qtbase5-dev qt6-base-dev libxcb-glx0-dev sudo xz-utils
|
||||
if [ "amd64" = "$TARGETARCH" ] && [ "${VULKAN_FROM_SOURCE}" = "true" ]; then
|
||||
if [ "amd64" = "$TARGETARCH" ]; then
|
||||
wget "https://sdk.lunarg.com/sdk/download/1.4.328.1/linux/vulkansdk-linux-x86_64-1.4.328.1.tar.xz" && \
|
||||
tar -xf vulkansdk-linux-x86_64-1.4.328.1.tar.xz && \
|
||||
rm vulkansdk-linux-x86_64-1.4.328.1.tar.xz && \
|
||||
@@ -64,11 +64,6 @@ RUN <<EOT bash
|
||||
cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/include/* /usr/include/ && \
|
||||
cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/share/* /usr/share/ && \
|
||||
rm -rf /opt/vulkan-sdk
|
||||
elif [ "amd64" = "${TARGETARCH}}" ]; then
|
||||
wget -qO- https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo tee /etc/apt/trusted.gpg.d/lunarg.asc && \
|
||||
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-noble.list http://packages.lunarg.com/vulkan/lunarg-vulkan-noble.list && \
|
||||
apt-get update && \
|
||||
apt-get install -y vulkan-sdk
|
||||
fi
|
||||
if [ "arm64" = "$TARGETARCH" ]; then
|
||||
mkdir vulkan && cd vulkan && \
|
||||
|
||||
@@ -103,7 +103,7 @@ RUN <<EOT bash
|
||||
git python-is-python3 bison libx11-xcb-dev liblz4-dev libzstd-dev \
|
||||
ocaml-core ninja-build pkg-config libxml2-dev wayland-protocols python3-jsonschema \
|
||||
clang-format qtbase5-dev qt6-base-dev libxcb-glx0-dev sudo xz-utils
|
||||
if [ "amd64" = "$TARGETARCH" ] && [ "${VULKAN_FROM_SOURCE}" = "true" ]; then
|
||||
if [ "amd64" = "$TARGETARCH" ]; then
|
||||
wget "https://sdk.lunarg.com/sdk/download/1.4.328.1/linux/vulkansdk-linux-x86_64-1.4.328.1.tar.xz" && \
|
||||
tar -xf vulkansdk-linux-x86_64-1.4.328.1.tar.xz && \
|
||||
rm vulkansdk-linux-x86_64-1.4.328.1.tar.xz && \
|
||||
@@ -121,11 +121,6 @@ RUN <<EOT bash
|
||||
cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/include/* /usr/include/ && \
|
||||
cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/share/* /usr/share/ && \
|
||||
rm -rf /opt/vulkan-sdk
|
||||
elif [ "amd64" = "${TARGETARCH}}" ]; then
|
||||
wget -qO- https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo tee /etc/apt/trusted.gpg.d/lunarg.asc && \
|
||||
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-noble.list http://packages.lunarg.com/vulkan/lunarg-vulkan-noble.list && \
|
||||
apt-get update && \
|
||||
apt-get install -y vulkan-sdk
|
||||
fi
|
||||
if [ "arm64" = "$TARGETARCH" ]; then
|
||||
mkdir vulkan && cd vulkan && \
|
||||
|
||||
@@ -60,7 +60,7 @@ RUN <<EOT bash
|
||||
git python-is-python3 bison libx11-xcb-dev liblz4-dev libzstd-dev \
|
||||
ocaml-core ninja-build pkg-config libxml2-dev wayland-protocols python3-jsonschema \
|
||||
clang-format qtbase5-dev qt6-base-dev libxcb-glx0-dev sudo xz-utils
|
||||
if [ "amd64" = "$TARGETARCH" ] && [ "${VULKAN_FROM_SOURCE}" = "true" ]; then
|
||||
if [ "amd64" = "$TARGETARCH" ]; then
|
||||
wget "https://sdk.lunarg.com/sdk/download/1.4.328.1/linux/vulkansdk-linux-x86_64-1.4.328.1.tar.xz" && \
|
||||
tar -xf vulkansdk-linux-x86_64-1.4.328.1.tar.xz && \
|
||||
rm vulkansdk-linux-x86_64-1.4.328.1.tar.xz && \
|
||||
@@ -78,11 +78,6 @@ RUN <<EOT bash
|
||||
cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/include/* /usr/include/ && \
|
||||
cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/share/* /usr/share/ && \
|
||||
rm -rf /opt/vulkan-sdk
|
||||
elif [ "amd64" = "${TARGETARCH}}" ]; then
|
||||
wget -qO- https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo tee /etc/apt/trusted.gpg.d/lunarg.asc && \
|
||||
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-noble.list http://packages.lunarg.com/vulkan/lunarg-vulkan-noble.list && \
|
||||
apt-get update && \
|
||||
apt-get install -y vulkan-sdk
|
||||
fi
|
||||
if [ "arm64" = "$TARGETARCH" ]; then
|
||||
mkdir vulkan && cd vulkan && \
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
|
||||
LLAMA_VERSION?=b1377188784f9aea26b8abde56d4aee8c733eec7
|
||||
LLAMA_VERSION?=593da7fa49503b68f9f01700be9f508f1e528992
|
||||
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
|
||||
|
||||
CMAKE_ARGS?=
|
||||
|
||||
@@ -17,9 +17,4 @@ if [ "x${BUILD_PROFILE}" == "xintel" ]; then
|
||||
fi
|
||||
EXTRA_PIP_INSTALL_FLAGS+=" --no-build-isolation"
|
||||
|
||||
if [ "x${BUILD_PROFILE}" == "xl4t12" ]; then
|
||||
USE_PIP=true
|
||||
fi
|
||||
|
||||
|
||||
installRequirements
|
||||
|
||||
@@ -1,5 +0,0 @@
|
||||
# Build dependencies needed for packages installed from source (e.g., git dependencies)
|
||||
# When using --no-build-isolation, these must be installed in the venv first
|
||||
wheel
|
||||
setuptools
|
||||
packaging
|
||||
@@ -16,10 +16,6 @@ if [ "x${BUILD_PROFILE}" == "xintel" ]; then
|
||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
||||
fi
|
||||
|
||||
if [ "x${BUILD_PROFILE}" == "xl4t12" ]; then
|
||||
USE_PIP=true
|
||||
fi
|
||||
|
||||
# Use python 3.12 for l4t
|
||||
if [ "x${BUILD_PROFILE}" == "xl4t13" ]; then
|
||||
PYTHON_VERSION="3.12"
|
||||
|
||||
@@ -16,8 +16,4 @@ if [ "x${BUILD_PROFILE}" == "xintel" ]; then
|
||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
||||
fi
|
||||
|
||||
if [ "x${BUILD_PROFILE}" == "xl4t12" ]; then
|
||||
USE_PIP=true
|
||||
fi
|
||||
|
||||
installRequirements
|
||||
|
||||
@@ -26,12 +26,6 @@ fi
|
||||
|
||||
EXTRA_PIP_INSTALL_FLAGS+=" --no-build-isolation"
|
||||
|
||||
|
||||
if [ "x${BUILD_PROFILE}" == "xl4t12" ]; then
|
||||
USE_PIP=true
|
||||
fi
|
||||
|
||||
|
||||
git clone https://github.com/neuphonic/neutts-air neutts-air
|
||||
|
||||
cp -rfv neutts-air/neuttsair ./
|
||||
|
||||
@@ -23,10 +23,6 @@ if [ "x${BUILD_PROFILE}" == "xl4t13" ]; then
|
||||
PY_STANDALONE_TAG="20251120"
|
||||
fi
|
||||
|
||||
if [ "x${BUILD_PROFILE}" == "xl4t12" ]; then
|
||||
USE_PIP=true
|
||||
fi
|
||||
|
||||
installRequirements
|
||||
|
||||
git clone https://github.com/microsoft/VibeVoice.git
|
||||
|
||||
@@ -63,25 +63,6 @@ func (m *GalleryBackend) IsMeta() bool {
|
||||
return len(m.CapabilitiesMap) > 0 && m.URI == ""
|
||||
}
|
||||
|
||||
// IsCompatibleWith checks if the backend is compatible with the current system capability.
|
||||
// For meta backends, it checks if any of the capabilities in the map match the system capability.
|
||||
// For concrete backends, it delegates to SystemState.IsBackendCompatible.
|
||||
func (m *GalleryBackend) IsCompatibleWith(systemState *system.SystemState) bool {
|
||||
if systemState == nil {
|
||||
return true
|
||||
}
|
||||
|
||||
// Meta backends are compatible if the system capability matches one of the keys
|
||||
if m.IsMeta() {
|
||||
capability := systemState.Capability(m.CapabilitiesMap)
|
||||
_, exists := m.CapabilitiesMap[capability]
|
||||
return exists
|
||||
}
|
||||
|
||||
// For concrete backends, delegate to the system package
|
||||
return systemState.IsBackendCompatible(m.Name, m.URI)
|
||||
}
|
||||
|
||||
func (m *GalleryBackend) SetInstalled(installed bool) {
|
||||
m.Installed = installed
|
||||
}
|
||||
|
||||
@@ -172,252 +172,6 @@ var _ = Describe("Gallery Backends", func() {
|
||||
Expect(nilMetaBackend.IsMeta()).To(BeFalse())
|
||||
})
|
||||
|
||||
It("should check IsCompatibleWith correctly for meta backends", func() {
|
||||
metaBackend := &GalleryBackend{
|
||||
Metadata: Metadata{
|
||||
Name: "meta-backend",
|
||||
},
|
||||
CapabilitiesMap: map[string]string{
|
||||
"nvidia": "nvidia-backend",
|
||||
"amd": "amd-backend",
|
||||
"default": "default-backend",
|
||||
},
|
||||
}
|
||||
|
||||
// Test with nil state - should be compatible
|
||||
Expect(metaBackend.IsCompatibleWith(nil)).To(BeTrue())
|
||||
|
||||
// Test with NVIDIA system - should be compatible (has nvidia key)
|
||||
nvidiaState := &system.SystemState{GPUVendor: "nvidia", VRAM: 8 * 1024 * 1024 * 1024}
|
||||
Expect(metaBackend.IsCompatibleWith(nvidiaState)).To(BeTrue())
|
||||
|
||||
// Test with default (no GPU) - should be compatible (has default key)
|
||||
defaultState := &system.SystemState{}
|
||||
Expect(metaBackend.IsCompatibleWith(defaultState)).To(BeTrue())
|
||||
})
|
||||
|
||||
Describe("IsCompatibleWith for concrete backends", func() {
|
||||
Context("CPU backends", func() {
|
||||
It("should be compatible on all systems", func() {
|
||||
cpuBackend := &GalleryBackend{
|
||||
Metadata: Metadata{
|
||||
Name: "cpu-llama-cpp",
|
||||
},
|
||||
URI: "quay.io/go-skynet/local-ai-backends:latest-cpu-llama-cpp",
|
||||
}
|
||||
Expect(cpuBackend.IsCompatibleWith(&system.SystemState{})).To(BeTrue())
|
||||
Expect(cpuBackend.IsCompatibleWith(&system.SystemState{GPUVendor: system.Nvidia, VRAM: 8 * 1024 * 1024 * 1024})).To(BeTrue())
|
||||
Expect(cpuBackend.IsCompatibleWith(&system.SystemState{GPUVendor: system.AMD, VRAM: 8 * 1024 * 1024 * 1024})).To(BeTrue())
|
||||
})
|
||||
})
|
||||
|
||||
Context("Darwin/Metal backends", func() {
|
||||
When("running on darwin", func() {
|
||||
BeforeEach(func() {
|
||||
if runtime.GOOS != "darwin" {
|
||||
Skip("Skipping darwin-specific tests on non-darwin system")
|
||||
}
|
||||
})
|
||||
|
||||
It("should be compatible for MLX backend", func() {
|
||||
mlxBackend := &GalleryBackend{
|
||||
Metadata: Metadata{
|
||||
Name: "mlx",
|
||||
},
|
||||
URI: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-mlx",
|
||||
}
|
||||
Expect(mlxBackend.IsCompatibleWith(&system.SystemState{})).To(BeTrue())
|
||||
})
|
||||
|
||||
It("should be compatible for metal-llama-cpp backend", func() {
|
||||
metalBackend := &GalleryBackend{
|
||||
Metadata: Metadata{
|
||||
Name: "metal-llama-cpp",
|
||||
},
|
||||
URI: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-llama-cpp",
|
||||
}
|
||||
Expect(metalBackend.IsCompatibleWith(&system.SystemState{})).To(BeTrue())
|
||||
})
|
||||
})
|
||||
|
||||
When("running on non-darwin", func() {
|
||||
BeforeEach(func() {
|
||||
if runtime.GOOS == "darwin" {
|
||||
Skip("Skipping non-darwin-specific tests on darwin system")
|
||||
}
|
||||
})
|
||||
|
||||
It("should NOT be compatible for MLX backend", func() {
|
||||
mlxBackend := &GalleryBackend{
|
||||
Metadata: Metadata{
|
||||
Name: "mlx",
|
||||
},
|
||||
URI: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-mlx",
|
||||
}
|
||||
Expect(mlxBackend.IsCompatibleWith(&system.SystemState{})).To(BeFalse())
|
||||
})
|
||||
|
||||
It("should NOT be compatible for metal-llama-cpp backend", func() {
|
||||
metalBackend := &GalleryBackend{
|
||||
Metadata: Metadata{
|
||||
Name: "metal-llama-cpp",
|
||||
},
|
||||
URI: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-llama-cpp",
|
||||
}
|
||||
Expect(metalBackend.IsCompatibleWith(&system.SystemState{})).To(BeFalse())
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
Context("NVIDIA/CUDA backends", func() {
|
||||
When("running on non-darwin", func() {
|
||||
BeforeEach(func() {
|
||||
if runtime.GOOS == "darwin" {
|
||||
Skip("Skipping CUDA tests on darwin system")
|
||||
}
|
||||
})
|
||||
|
||||
It("should NOT be compatible without nvidia GPU", func() {
|
||||
cudaBackend := &GalleryBackend{
|
||||
Metadata: Metadata{
|
||||
Name: "cuda12-llama-cpp",
|
||||
},
|
||||
URI: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-llama-cpp",
|
||||
}
|
||||
Expect(cudaBackend.IsCompatibleWith(&system.SystemState{})).To(BeFalse())
|
||||
Expect(cudaBackend.IsCompatibleWith(&system.SystemState{GPUVendor: system.AMD, VRAM: 8 * 1024 * 1024 * 1024})).To(BeFalse())
|
||||
})
|
||||
|
||||
It("should be compatible with nvidia GPU", func() {
|
||||
cudaBackend := &GalleryBackend{
|
||||
Metadata: Metadata{
|
||||
Name: "cuda12-llama-cpp",
|
||||
},
|
||||
URI: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-llama-cpp",
|
||||
}
|
||||
Expect(cudaBackend.IsCompatibleWith(&system.SystemState{GPUVendor: system.Nvidia, VRAM: 8 * 1024 * 1024 * 1024})).To(BeTrue())
|
||||
})
|
||||
|
||||
It("should be compatible with cuda13 backend on nvidia GPU", func() {
|
||||
cuda13Backend := &GalleryBackend{
|
||||
Metadata: Metadata{
|
||||
Name: "cuda13-llama-cpp",
|
||||
},
|
||||
URI: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-llama-cpp",
|
||||
}
|
||||
Expect(cuda13Backend.IsCompatibleWith(&system.SystemState{GPUVendor: system.Nvidia, VRAM: 8 * 1024 * 1024 * 1024})).To(BeTrue())
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
Context("AMD/ROCm backends", func() {
|
||||
When("running on non-darwin", func() {
|
||||
BeforeEach(func() {
|
||||
if runtime.GOOS == "darwin" {
|
||||
Skip("Skipping AMD/ROCm tests on darwin system")
|
||||
}
|
||||
})
|
||||
|
||||
It("should NOT be compatible without AMD GPU", func() {
|
||||
rocmBackend := &GalleryBackend{
|
||||
Metadata: Metadata{
|
||||
Name: "rocm-llama-cpp",
|
||||
},
|
||||
URI: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-llama-cpp",
|
||||
}
|
||||
Expect(rocmBackend.IsCompatibleWith(&system.SystemState{})).To(BeFalse())
|
||||
Expect(rocmBackend.IsCompatibleWith(&system.SystemState{GPUVendor: system.Nvidia, VRAM: 8 * 1024 * 1024 * 1024})).To(BeFalse())
|
||||
})
|
||||
|
||||
It("should be compatible with AMD GPU", func() {
|
||||
rocmBackend := &GalleryBackend{
|
||||
Metadata: Metadata{
|
||||
Name: "rocm-llama-cpp",
|
||||
},
|
||||
URI: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-llama-cpp",
|
||||
}
|
||||
Expect(rocmBackend.IsCompatibleWith(&system.SystemState{GPUVendor: system.AMD, VRAM: 8 * 1024 * 1024 * 1024})).To(BeTrue())
|
||||
})
|
||||
|
||||
It("should be compatible with hipblas backend on AMD GPU", func() {
|
||||
hipBackend := &GalleryBackend{
|
||||
Metadata: Metadata{
|
||||
Name: "hip-llama-cpp",
|
||||
},
|
||||
URI: "quay.io/go-skynet/local-ai-backends:latest-gpu-hip-llama-cpp",
|
||||
}
|
||||
Expect(hipBackend.IsCompatibleWith(&system.SystemState{GPUVendor: system.AMD, VRAM: 8 * 1024 * 1024 * 1024})).To(BeTrue())
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
Context("Intel/SYCL backends", func() {
|
||||
When("running on non-darwin", func() {
|
||||
BeforeEach(func() {
|
||||
if runtime.GOOS == "darwin" {
|
||||
Skip("Skipping Intel/SYCL tests on darwin system")
|
||||
}
|
||||
})
|
||||
|
||||
It("should NOT be compatible without Intel GPU", func() {
|
||||
intelBackend := &GalleryBackend{
|
||||
Metadata: Metadata{
|
||||
Name: "intel-sycl-f16-llama-cpp",
|
||||
},
|
||||
URI: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-llama-cpp",
|
||||
}
|
||||
Expect(intelBackend.IsCompatibleWith(&system.SystemState{})).To(BeFalse())
|
||||
Expect(intelBackend.IsCompatibleWith(&system.SystemState{GPUVendor: system.Nvidia, VRAM: 8 * 1024 * 1024 * 1024})).To(BeFalse())
|
||||
})
|
||||
|
||||
It("should be compatible with Intel GPU", func() {
|
||||
intelBackend := &GalleryBackend{
|
||||
Metadata: Metadata{
|
||||
Name: "intel-sycl-f16-llama-cpp",
|
||||
},
|
||||
URI: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-llama-cpp",
|
||||
}
|
||||
Expect(intelBackend.IsCompatibleWith(&system.SystemState{GPUVendor: system.Intel, VRAM: 8 * 1024 * 1024 * 1024})).To(BeTrue())
|
||||
})
|
||||
|
||||
It("should be compatible with intel-sycl-f32 backend on Intel GPU", func() {
|
||||
intelF32Backend := &GalleryBackend{
|
||||
Metadata: Metadata{
|
||||
Name: "intel-sycl-f32-llama-cpp",
|
||||
},
|
||||
URI: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-llama-cpp",
|
||||
}
|
||||
Expect(intelF32Backend.IsCompatibleWith(&system.SystemState{GPUVendor: system.Intel, VRAM: 8 * 1024 * 1024 * 1024})).To(BeTrue())
|
||||
})
|
||||
|
||||
It("should be compatible with intel-transformers backend on Intel GPU", func() {
|
||||
intelTransformersBackend := &GalleryBackend{
|
||||
Metadata: Metadata{
|
||||
Name: "intel-transformers",
|
||||
},
|
||||
URI: "quay.io/go-skynet/local-ai-backends:latest-intel-transformers",
|
||||
}
|
||||
Expect(intelTransformersBackend.IsCompatibleWith(&system.SystemState{GPUVendor: system.Intel, VRAM: 8 * 1024 * 1024 * 1024})).To(BeTrue())
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
Context("Vulkan backends", func() {
|
||||
It("should be compatible on CPU-only systems", func() {
|
||||
// Vulkan backends don't have a specific GPU vendor requirement in the current logic
|
||||
// They are compatible if no other GPU-specific pattern matches
|
||||
vulkanBackend := &GalleryBackend{
|
||||
Metadata: Metadata{
|
||||
Name: "vulkan-llama-cpp",
|
||||
},
|
||||
URI: "quay.io/go-skynet/local-ai-backends:latest-gpu-vulkan-llama-cpp",
|
||||
}
|
||||
// Vulkan doesn't have vendor-specific filtering in current implementation
|
||||
Expect(vulkanBackend.IsCompatibleWith(&system.SystemState{})).To(BeTrue())
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
It("should find best backend from meta based on system capabilities", func() {
|
||||
|
||||
metaBackend := &GalleryBackend{
|
||||
|
||||
@@ -226,16 +226,6 @@ func AvailableGalleryModels(galleries []config.Gallery, systemState *system.Syst
|
||||
|
||||
// List available backends
|
||||
func AvailableBackends(galleries []config.Gallery, systemState *system.SystemState) (GalleryElements[*GalleryBackend], error) {
|
||||
return availableBackendsWithFilter(galleries, systemState, true)
|
||||
}
|
||||
|
||||
// AvailableBackendsUnfiltered returns all available backends without filtering by system capability.
|
||||
func AvailableBackendsUnfiltered(galleries []config.Gallery, systemState *system.SystemState) (GalleryElements[*GalleryBackend], error) {
|
||||
return availableBackendsWithFilter(galleries, systemState, false)
|
||||
}
|
||||
|
||||
// availableBackendsWithFilter is a helper function that lists available backends with optional filtering.
|
||||
func availableBackendsWithFilter(galleries []config.Gallery, systemState *system.SystemState, filterByCapability bool) (GalleryElements[*GalleryBackend], error) {
|
||||
var backends []*GalleryBackend
|
||||
|
||||
systemBackends, err := ListSystemBackends(systemState)
|
||||
@@ -251,17 +241,7 @@ func availableBackendsWithFilter(galleries []config.Gallery, systemState *system
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Filter backends by system capability if requested
|
||||
if filterByCapability {
|
||||
for _, backend := range galleryBackends {
|
||||
if backend.IsCompatibleWith(systemState) {
|
||||
backends = append(backends, backend)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
backends = append(backends, galleryBackends...)
|
||||
}
|
||||
backends = append(backends, galleryBackends...)
|
||||
}
|
||||
|
||||
return backends, nil
|
||||
|
||||
@@ -617,12 +617,6 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
|
||||
installedBackendsCount = len(installedBackends)
|
||||
}
|
||||
|
||||
// Get the detected system capability
|
||||
detectedCapability := ""
|
||||
if appConfig.SystemState != nil {
|
||||
detectedCapability = appConfig.SystemState.DetectedCapability()
|
||||
}
|
||||
|
||||
return c.JSON(200, map[string]interface{}{
|
||||
"backends": backendsJSON,
|
||||
"repositories": appConfig.BackendGalleries,
|
||||
@@ -635,7 +629,6 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
|
||||
"totalPages": totalPages,
|
||||
"prevPage": prevPage,
|
||||
"nextPage": nextPage,
|
||||
"systemCapability": detectedCapability,
|
||||
})
|
||||
})
|
||||
|
||||
|
||||
@@ -54,11 +54,6 @@
|
||||
<span class="font-semibold text-cyan-300" x-text="installedBackends"></span>
|
||||
<span class="text-[#94A3B8] ml-1">installed</span>
|
||||
</a>
|
||||
<div class="flex items-center bg-[#101827] rounded-lg px-4 py-2 border border-[#38BDF8]/30">
|
||||
<i class="fas fa-microchip text-[#38BDF8] mr-2"></i>
|
||||
<span class="text-[#94A3B8] mr-1">Capability:</span>
|
||||
<span class="font-semibold text-[#38BDF8]" x-text="systemCapability"></span>
|
||||
</div>
|
||||
<a href="https://localai.io/backends/" target="_blank" class="btn-primary">
|
||||
<i class="fas fa-info-circle mr-2"></i>
|
||||
<span>Documentation</span>
|
||||
@@ -593,7 +588,6 @@ function backendsGallery() {
|
||||
totalPages: 1,
|
||||
availableBackends: 0,
|
||||
installedBackends: 0,
|
||||
systemCapability: '',
|
||||
selectedBackend: null,
|
||||
jobProgress: {},
|
||||
notifications: [],
|
||||
@@ -689,7 +683,6 @@ function backendsGallery() {
|
||||
this.totalPages = data.totalPages || 1;
|
||||
this.availableBackends = data.availableBackends || 0;
|
||||
this.installedBackends = data.installedBackends || 0;
|
||||
this.systemCapability = data.systemCapability || 'default';
|
||||
} catch (error) {
|
||||
console.error('Error fetching backends:', error);
|
||||
} finally {
|
||||
|
||||
@@ -29,3 +29,79 @@ helm show values go-skynet/local-ai > values.yaml
|
||||
|
||||
helm install local-ai go-skynet/local-ai -f values.yaml
|
||||
```
|
||||
|
||||
## Security Context Requirements
|
||||
|
||||
LocalAI spawns child processes to run model backends (e.g., llama.cpp, diffusers, whisper). To properly stop these processes and free resources like VRAM, LocalAI needs permission to send signals to its child processes.
|
||||
|
||||
If you're using restrictive security contexts, ensure the `CAP_KILL` capability is available:
|
||||
|
||||
```yaml
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: local-ai
|
||||
spec:
|
||||
containers:
|
||||
- name: local-ai
|
||||
image: quay.io/go-skynet/local-ai:latest
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
add:
|
||||
- KILL # Required for LocalAI to stop backend processes
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
```
|
||||
|
||||
Without the `KILL` capability, LocalAI cannot terminate backend processes when models are stopped, leading to:
|
||||
- VRAM and memory not being freed
|
||||
- Orphaned backend processes holding GPU resources
|
||||
- Error messages like `error while deleting process error=permission denied`
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Issue: VRAM is not freed when stopping models
|
||||
|
||||
**Symptoms:**
|
||||
- Models appear to stop but GPU memory remains allocated
|
||||
- Logs show `(deleteProcess) error while deleting process error=permission denied`
|
||||
- Backend processes remain running after model unload
|
||||
|
||||
**Common Causes:**
|
||||
- All capabilities are dropped without adding back `CAP_KILL`
|
||||
- Using user namespacing (`hostUsers: false`) with certain configurations
|
||||
- Overly restrictive seccomp profiles that block signal-related syscalls
|
||||
- Pod Security Policies or Pod Security Standards blocking required capabilities
|
||||
|
||||
**Solution:**
|
||||
|
||||
1. Add the `KILL` capability to your container's security context as shown in the example above.
|
||||
|
||||
2. If you're using a Helm chart, configure the security context in your `values.yaml`:
|
||||
|
||||
```yaml
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
add:
|
||||
- KILL
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
```
|
||||
|
||||
3. Verify the capability is present in the running pod:
|
||||
|
||||
```bash
|
||||
kubectl exec -it <pod-name> -- grep CapEff /proc/1/status
|
||||
```
|
||||
|
||||
4. If running in privileged mode works but the above doesn't, check your cluster's Pod Security Policies or Pod Security Standards. You may need to adjust cluster-level policies to allow the `KILL` capability.
|
||||
|
||||
5. Ensure your seccomp profile (if custom) allows the `kill` syscall. The `RuntimeDefault` profile typically includes this.
|
||||
|
||||
@@ -29,3 +29,79 @@ helm show values go-skynet/local-ai > values.yaml
|
||||
|
||||
helm install local-ai go-skynet/local-ai -f values.yaml
|
||||
```
|
||||
|
||||
## Security Context Requirements
|
||||
|
||||
LocalAI spawns child processes to run model backends (e.g., llama.cpp, diffusers, whisper). To properly stop these processes and free resources like VRAM, LocalAI needs permission to send signals to its child processes.
|
||||
|
||||
If you're using restrictive security contexts, ensure the `CAP_KILL` capability is available:
|
||||
|
||||
```yaml
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: local-ai
|
||||
spec:
|
||||
containers:
|
||||
- name: local-ai
|
||||
image: quay.io/go-skynet/local-ai:latest
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
add:
|
||||
- KILL # Required for LocalAI to stop backend processes
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
```
|
||||
|
||||
Without the `KILL` capability, LocalAI cannot terminate backend processes when models are stopped, leading to:
|
||||
- VRAM and memory not being freed
|
||||
- Orphaned backend processes holding GPU resources
|
||||
- Error messages like `error while deleting process error=permission denied`
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Issue: VRAM is not freed when stopping models
|
||||
|
||||
**Symptoms:**
|
||||
- Models appear to stop but GPU memory remains allocated
|
||||
- Logs show `(deleteProcess) error while deleting process error=permission denied`
|
||||
- Backend processes remain running after model unload
|
||||
|
||||
**Common Causes:**
|
||||
- All capabilities are dropped without adding back `CAP_KILL`
|
||||
- Using user namespacing (`hostUsers: false`) with certain configurations
|
||||
- Overly restrictive seccomp profiles that block signal-related syscalls
|
||||
- Pod Security Policies or Pod Security Standards blocking required capabilities
|
||||
|
||||
**Solution:**
|
||||
|
||||
1. Add the `KILL` capability to your container's security context as shown in the example above.
|
||||
|
||||
2. If you're using a Helm chart, configure the security context in your `values.yaml`:
|
||||
|
||||
```yaml
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
add:
|
||||
- KILL
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
```
|
||||
|
||||
3. Verify the capability is present in the running pod:
|
||||
|
||||
```bash
|
||||
kubectl exec -it <pod-name> -- grep CapEff /proc/1/status
|
||||
```
|
||||
|
||||
4. If running in privileged mode works but the above doesn't, check your cluster's Pod Security Policies or Pod Security Standards. You may need to adjust cluster-level policies to allow the `KILL` capability.
|
||||
|
||||
5. Ensure your seccomp profile (if custom) allows the `kill` syscall. The `RuntimeDefault` profile typically includes this.
|
||||
|
||||
@@ -24,6 +24,8 @@ func (ml *ModelLoader) deleteProcess(s string) error {
|
||||
return fmt.Errorf("model %s not found", s)
|
||||
}
|
||||
|
||||
defer delete(ml.models, s)
|
||||
|
||||
retries := 1
|
||||
for model.GRPC(false, ml.wd).IsBusy() {
|
||||
xlog.Debug("Model busy. Waiting.", "model", s)
|
||||
@@ -46,7 +48,6 @@ func (ml *ModelLoader) deleteProcess(s string) error {
|
||||
if process == nil {
|
||||
xlog.Error("No process", "model", s)
|
||||
// Nothing to do as there is no process
|
||||
delete(ml.models, s)
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -55,10 +56,6 @@ func (ml *ModelLoader) deleteProcess(s string) error {
|
||||
xlog.Error("(deleteProcess) error while deleting process", "error", err, "model", s)
|
||||
}
|
||||
|
||||
if err == nil {
|
||||
delete(ml.models, s)
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
|
||||
@@ -12,17 +12,15 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
// Public constants - used by tests and external packages
|
||||
Nvidia = "nvidia"
|
||||
AMD = "amd"
|
||||
Intel = "intel"
|
||||
|
||||
// Private constants - only used within this package
|
||||
defaultCapability = "default"
|
||||
nvidiaL4T = "nvidia-l4t"
|
||||
darwinX86 = "darwin-x86"
|
||||
metal = "metal"
|
||||
vulkan = "vulkan"
|
||||
nvidia = "nvidia"
|
||||
|
||||
amd = "amd"
|
||||
intel = "intel"
|
||||
vulkan = "vulkan"
|
||||
|
||||
nvidiaCuda13 = "nvidia-cuda-13"
|
||||
nvidiaCuda12 = "nvidia-cuda-12"
|
||||
@@ -32,16 +30,6 @@ const (
|
||||
capabilityEnv = "LOCALAI_FORCE_META_BACKEND_CAPABILITY"
|
||||
capabilityRunFileEnv = "LOCALAI_FORCE_META_BACKEND_CAPABILITY_RUN_FILE"
|
||||
defaultRunFile = "/run/localai/capability"
|
||||
|
||||
// Backend detection tokens (private)
|
||||
backendTokenDarwin = "darwin"
|
||||
backendTokenMLX = "mlx"
|
||||
backendTokenMetal = "metal"
|
||||
backendTokenL4T = "l4t"
|
||||
backendTokenCUDA = "cuda"
|
||||
backendTokenROCM = "rocm"
|
||||
backendTokenHIP = "hip"
|
||||
backendTokenSYCL = "sycl"
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -108,7 +96,7 @@ func (s *SystemState) getSystemCapabilities() string {
|
||||
|
||||
// If arm64 on linux and a nvidia gpu is detected, we will return nvidia-l4t
|
||||
if runtime.GOOS == "linux" && runtime.GOARCH == "arm64" {
|
||||
if s.GPUVendor == Nvidia {
|
||||
if s.GPUVendor == nvidia {
|
||||
xlog.Info("Using nvidia-l4t capability (arm64 on linux)", "env", capabilityEnv)
|
||||
if cuda13DirExists {
|
||||
return nvidiaL4TCuda13
|
||||
@@ -143,6 +131,7 @@ func (s *SystemState) getSystemCapabilities() string {
|
||||
return s.GPUVendor
|
||||
}
|
||||
|
||||
|
||||
// BackendPreferenceTokens returns a list of substrings that represent the preferred
|
||||
// backend implementation order for the current system capability. Callers can use
|
||||
// these tokens to select the most appropriate concrete backend among multiple
|
||||
@@ -150,76 +139,19 @@ func (s *SystemState) getSystemCapabilities() string {
|
||||
func (s *SystemState) BackendPreferenceTokens() []string {
|
||||
capStr := strings.ToLower(s.getSystemCapabilities())
|
||||
switch {
|
||||
case strings.HasPrefix(capStr, Nvidia):
|
||||
return []string{backendTokenCUDA, vulkan, "cpu"}
|
||||
case strings.HasPrefix(capStr, AMD):
|
||||
return []string{backendTokenROCM, backendTokenHIP, vulkan, "cpu"}
|
||||
case strings.HasPrefix(capStr, Intel):
|
||||
return []string{backendTokenSYCL, Intel, "cpu"}
|
||||
case strings.HasPrefix(capStr, nvidia):
|
||||
return []string{"cuda", "vulkan", "cpu"}
|
||||
case strings.HasPrefix(capStr, amd):
|
||||
return []string{"rocm", "hip", "vulkan", "cpu"}
|
||||
case strings.HasPrefix(capStr, intel):
|
||||
return []string{"sycl", intel, "cpu"}
|
||||
case strings.HasPrefix(capStr, metal):
|
||||
return []string{backendTokenMetal, "cpu"}
|
||||
return []string{"metal", "cpu"}
|
||||
case strings.HasPrefix(capStr, darwinX86):
|
||||
return []string{"darwin-x86", "cpu"}
|
||||
case strings.HasPrefix(capStr, vulkan):
|
||||
return []string{vulkan, "cpu"}
|
||||
return []string{"vulkan", "cpu"}
|
||||
default:
|
||||
return []string{"cpu"}
|
||||
}
|
||||
}
|
||||
|
||||
// DetectedCapability returns the detected system capability string.
|
||||
// This can be used by the UI to display what capability was detected.
|
||||
func (s *SystemState) DetectedCapability() string {
|
||||
return s.getSystemCapabilities()
|
||||
}
|
||||
|
||||
// IsBackendCompatible checks if a backend (identified by name and URI) is compatible
|
||||
// with the current system capability. This function uses getSystemCapabilities to ensure
|
||||
// consistency with capability detection (including VRAM checks, environment overrides, etc.).
|
||||
func (s *SystemState) IsBackendCompatible(name, uri string) bool {
|
||||
combined := strings.ToLower(name + " " + uri)
|
||||
capability := s.getSystemCapabilities()
|
||||
|
||||
// Check for darwin/macOS-specific backends (mlx, metal, darwin)
|
||||
isDarwinBackend := strings.Contains(combined, backendTokenDarwin) ||
|
||||
strings.Contains(combined, backendTokenMLX) ||
|
||||
strings.Contains(combined, backendTokenMetal)
|
||||
if isDarwinBackend {
|
||||
// Darwin backends require the system to be running on darwin with metal or darwin-x86 capability
|
||||
return capability == metal || capability == darwinX86
|
||||
}
|
||||
|
||||
// Check for NVIDIA L4T-specific backends (arm64 Linux with NVIDIA GPU)
|
||||
// This must be checked before the general NVIDIA check as L4T backends
|
||||
// may also contain "cuda" or "nvidia" in their names
|
||||
isL4TBackend := strings.Contains(combined, backendTokenL4T)
|
||||
if isL4TBackend {
|
||||
return strings.HasPrefix(capability, nvidiaL4T)
|
||||
}
|
||||
|
||||
// Check for NVIDIA/CUDA-specific backends (non-L4T)
|
||||
isNvidiaBackend := strings.Contains(combined, backendTokenCUDA) ||
|
||||
strings.Contains(combined, Nvidia)
|
||||
if isNvidiaBackend {
|
||||
// NVIDIA backends are compatible with nvidia, nvidia-cuda-12, nvidia-cuda-13, and l4t capabilities
|
||||
return strings.HasPrefix(capability, Nvidia)
|
||||
}
|
||||
|
||||
// Check for AMD/ROCm-specific backends
|
||||
isAMDBackend := strings.Contains(combined, backendTokenROCM) ||
|
||||
strings.Contains(combined, backendTokenHIP) ||
|
||||
strings.Contains(combined, AMD)
|
||||
if isAMDBackend {
|
||||
return capability == AMD
|
||||
}
|
||||
|
||||
// Check for Intel/SYCL-specific backends
|
||||
isIntelBackend := strings.Contains(combined, backendTokenSYCL) ||
|
||||
strings.Contains(combined, Intel)
|
||||
if isIntelBackend {
|
||||
return capability == Intel
|
||||
}
|
||||
|
||||
// CPU backends are always compatible
|
||||
return true
|
||||
}
|
||||
|
||||
191
swagger/docs.go
191
swagger/docs.go
@@ -1198,30 +1198,6 @@ const docTemplate = `{
|
||||
}
|
||||
}
|
||||
},
|
||||
"/v1/messages": {
|
||||
"post": {
|
||||
"summary": "Generate a message response for the given messages and model.",
|
||||
"parameters": [
|
||||
{
|
||||
"description": "query params",
|
||||
"name": "request",
|
||||
"in": "body",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"$ref": "#/definitions/schema.AnthropicRequest"
|
||||
}
|
||||
}
|
||||
],
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "Response",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/schema.AnthropicResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"/v1/models": {
|
||||
"get": {
|
||||
"summary": "List and describe the various models available in the API.",
|
||||
@@ -1763,169 +1739,6 @@ const docTemplate = `{
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.AnthropicContentBlock": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"content": {},
|
||||
"id": {
|
||||
"type": "string"
|
||||
},
|
||||
"input": {
|
||||
"type": "object",
|
||||
"additionalProperties": true
|
||||
},
|
||||
"is_error": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"name": {
|
||||
"type": "string"
|
||||
},
|
||||
"source": {
|
||||
"$ref": "#/definitions/schema.AnthropicImageSource"
|
||||
},
|
||||
"text": {
|
||||
"type": "string"
|
||||
},
|
||||
"tool_use_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.AnthropicImageSource": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"data": {
|
||||
"type": "string"
|
||||
},
|
||||
"media_type": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.AnthropicMessage": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"content": {},
|
||||
"role": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.AnthropicRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"max_tokens": {
|
||||
"type": "integer"
|
||||
},
|
||||
"messages": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/definitions/schema.AnthropicMessage"
|
||||
}
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"model": {
|
||||
"type": "string"
|
||||
},
|
||||
"stop_sequences": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"stream": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"system": {
|
||||
"type": "string"
|
||||
},
|
||||
"temperature": {
|
||||
"type": "number"
|
||||
},
|
||||
"tool_choice": {},
|
||||
"tools": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/definitions/schema.AnthropicTool"
|
||||
}
|
||||
},
|
||||
"top_k": {
|
||||
"type": "integer"
|
||||
},
|
||||
"top_p": {
|
||||
"type": "number"
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.AnthropicResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"content": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/definitions/schema.AnthropicContentBlock"
|
||||
}
|
||||
},
|
||||
"id": {
|
||||
"type": "string"
|
||||
},
|
||||
"model": {
|
||||
"type": "string"
|
||||
},
|
||||
"role": {
|
||||
"type": "string"
|
||||
},
|
||||
"stop_reason": {
|
||||
"type": "string"
|
||||
},
|
||||
"stop_sequence": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": {
|
||||
"type": "string"
|
||||
},
|
||||
"usage": {
|
||||
"$ref": "#/definitions/schema.AnthropicUsage"
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.AnthropicTool": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"description": {
|
||||
"type": "string"
|
||||
},
|
||||
"input_schema": {
|
||||
"type": "object",
|
||||
"additionalProperties": true
|
||||
},
|
||||
"name": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.AnthropicUsage": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"input_tokens": {
|
||||
"type": "integer"
|
||||
},
|
||||
"output_tokens": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.BackendMonitorRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
@@ -2416,10 +2229,6 @@ const docTemplate = `{
|
||||
"description": "The message name (used for tools calls)",
|
||||
"type": "string"
|
||||
},
|
||||
"reasoning": {
|
||||
"description": "Reasoning content extracted from \u003cthinking\u003e...\u003c/thinking\u003e tags",
|
||||
"type": "string"
|
||||
},
|
||||
"role": {
|
||||
"description": "The message role",
|
||||
"type": "string"
|
||||
|
||||
@@ -1191,30 +1191,6 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"/v1/messages": {
|
||||
"post": {
|
||||
"summary": "Generate a message response for the given messages and model.",
|
||||
"parameters": [
|
||||
{
|
||||
"description": "query params",
|
||||
"name": "request",
|
||||
"in": "body",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"$ref": "#/definitions/schema.AnthropicRequest"
|
||||
}
|
||||
}
|
||||
],
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "Response",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/schema.AnthropicResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"/v1/models": {
|
||||
"get": {
|
||||
"summary": "List and describe the various models available in the API.",
|
||||
@@ -1756,169 +1732,6 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.AnthropicContentBlock": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"content": {},
|
||||
"id": {
|
||||
"type": "string"
|
||||
},
|
||||
"input": {
|
||||
"type": "object",
|
||||
"additionalProperties": true
|
||||
},
|
||||
"is_error": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"name": {
|
||||
"type": "string"
|
||||
},
|
||||
"source": {
|
||||
"$ref": "#/definitions/schema.AnthropicImageSource"
|
||||
},
|
||||
"text": {
|
||||
"type": "string"
|
||||
},
|
||||
"tool_use_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.AnthropicImageSource": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"data": {
|
||||
"type": "string"
|
||||
},
|
||||
"media_type": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.AnthropicMessage": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"content": {},
|
||||
"role": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.AnthropicRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"max_tokens": {
|
||||
"type": "integer"
|
||||
},
|
||||
"messages": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/definitions/schema.AnthropicMessage"
|
||||
}
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"model": {
|
||||
"type": "string"
|
||||
},
|
||||
"stop_sequences": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"stream": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"system": {
|
||||
"type": "string"
|
||||
},
|
||||
"temperature": {
|
||||
"type": "number"
|
||||
},
|
||||
"tool_choice": {},
|
||||
"tools": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/definitions/schema.AnthropicTool"
|
||||
}
|
||||
},
|
||||
"top_k": {
|
||||
"type": "integer"
|
||||
},
|
||||
"top_p": {
|
||||
"type": "number"
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.AnthropicResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"content": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/definitions/schema.AnthropicContentBlock"
|
||||
}
|
||||
},
|
||||
"id": {
|
||||
"type": "string"
|
||||
},
|
||||
"model": {
|
||||
"type": "string"
|
||||
},
|
||||
"role": {
|
||||
"type": "string"
|
||||
},
|
||||
"stop_reason": {
|
||||
"type": "string"
|
||||
},
|
||||
"stop_sequence": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": {
|
||||
"type": "string"
|
||||
},
|
||||
"usage": {
|
||||
"$ref": "#/definitions/schema.AnthropicUsage"
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.AnthropicTool": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"description": {
|
||||
"type": "string"
|
||||
},
|
||||
"input_schema": {
|
||||
"type": "object",
|
||||
"additionalProperties": true
|
||||
},
|
||||
"name": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.AnthropicUsage": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"input_tokens": {
|
||||
"type": "integer"
|
||||
},
|
||||
"output_tokens": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.BackendMonitorRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
@@ -2409,10 +2222,6 @@
|
||||
"description": "The message name (used for tools calls)",
|
||||
"type": "string"
|
||||
},
|
||||
"reasoning": {
|
||||
"description": "Reasoning content extracted from \u003cthinking\u003e...\u003c/thinking\u003e tags",
|
||||
"type": "string"
|
||||
},
|
||||
"role": {
|
||||
"description": "The message role",
|
||||
"type": "string"
|
||||
|
||||
@@ -239,114 +239,6 @@ definitions:
|
||||
start:
|
||||
type: number
|
||||
type: object
|
||||
schema.AnthropicContentBlock:
|
||||
properties:
|
||||
content: {}
|
||||
id:
|
||||
type: string
|
||||
input:
|
||||
additionalProperties: true
|
||||
type: object
|
||||
is_error:
|
||||
type: boolean
|
||||
name:
|
||||
type: string
|
||||
source:
|
||||
$ref: '#/definitions/schema.AnthropicImageSource'
|
||||
text:
|
||||
type: string
|
||||
tool_use_id:
|
||||
type: string
|
||||
type:
|
||||
type: string
|
||||
type: object
|
||||
schema.AnthropicImageSource:
|
||||
properties:
|
||||
data:
|
||||
type: string
|
||||
media_type:
|
||||
type: string
|
||||
type:
|
||||
type: string
|
||||
type: object
|
||||
schema.AnthropicMessage:
|
||||
properties:
|
||||
content: {}
|
||||
role:
|
||||
type: string
|
||||
type: object
|
||||
schema.AnthropicRequest:
|
||||
properties:
|
||||
max_tokens:
|
||||
type: integer
|
||||
messages:
|
||||
items:
|
||||
$ref: '#/definitions/schema.AnthropicMessage'
|
||||
type: array
|
||||
metadata:
|
||||
additionalProperties:
|
||||
type: string
|
||||
type: object
|
||||
model:
|
||||
type: string
|
||||
stop_sequences:
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
stream:
|
||||
type: boolean
|
||||
system:
|
||||
type: string
|
||||
temperature:
|
||||
type: number
|
||||
tool_choice: {}
|
||||
tools:
|
||||
items:
|
||||
$ref: '#/definitions/schema.AnthropicTool'
|
||||
type: array
|
||||
top_k:
|
||||
type: integer
|
||||
top_p:
|
||||
type: number
|
||||
type: object
|
||||
schema.AnthropicResponse:
|
||||
properties:
|
||||
content:
|
||||
items:
|
||||
$ref: '#/definitions/schema.AnthropicContentBlock'
|
||||
type: array
|
||||
id:
|
||||
type: string
|
||||
model:
|
||||
type: string
|
||||
role:
|
||||
type: string
|
||||
stop_reason:
|
||||
type: string
|
||||
stop_sequence:
|
||||
type: string
|
||||
type:
|
||||
type: string
|
||||
usage:
|
||||
$ref: '#/definitions/schema.AnthropicUsage'
|
||||
type: object
|
||||
schema.AnthropicTool:
|
||||
properties:
|
||||
description:
|
||||
type: string
|
||||
input_schema:
|
||||
additionalProperties: true
|
||||
type: object
|
||||
name:
|
||||
type: string
|
||||
type: object
|
||||
schema.AnthropicUsage:
|
||||
properties:
|
||||
input_tokens:
|
||||
type: integer
|
||||
output_tokens:
|
||||
type: integer
|
||||
type: object
|
||||
schema.BackendMonitorRequest:
|
||||
properties:
|
||||
model:
|
||||
@@ -681,9 +573,6 @@ definitions:
|
||||
name:
|
||||
description: The message name (used for tools calls)
|
||||
type: string
|
||||
reasoning:
|
||||
description: Reasoning content extracted from <thinking>...</thinking> tags
|
||||
type: string
|
||||
role:
|
||||
description: The message role
|
||||
type: string
|
||||
@@ -1924,21 +1813,6 @@ paths:
|
||||
schema:
|
||||
$ref: '#/definitions/schema.OpenAIResponse'
|
||||
summary: Stream MCP chat completions with reasoning, tool calls, and results
|
||||
/v1/messages:
|
||||
post:
|
||||
parameters:
|
||||
- description: query params
|
||||
in: body
|
||||
name: request
|
||||
required: true
|
||||
schema:
|
||||
$ref: '#/definitions/schema.AnthropicRequest'
|
||||
responses:
|
||||
"200":
|
||||
description: Response
|
||||
schema:
|
||||
$ref: '#/definitions/schema.AnthropicResponse'
|
||||
summary: Generate a message response for the given messages and model.
|
||||
/v1/models:
|
||||
get:
|
||||
responses:
|
||||
|
||||
Reference in New Issue
Block a user