Compare commits

..

1 Commits

Author SHA1 Message Date
Ettore Di Giacinto
3f52776a1c WIP 2025-07-23 21:18:47 +02:00
106 changed files with 3417 additions and 3065 deletions

View File

@@ -381,12 +381,24 @@ jobs:
dockerfile: "./backend/Dockerfile.python" dockerfile: "./backend/Dockerfile.python"
context: "./backend" context: "./backend"
# sycl builds # sycl builds
- build-type: 'intel' - build-type: 'sycl_f32'
cuda-major-version: "" cuda-major-version: ""
cuda-minor-version: "" cuda-minor-version: ""
platforms: 'linux/amd64' platforms: 'linux/amd64'
tag-latest: 'auto' tag-latest: 'auto'
tag-suffix: '-gpu-intel-rerankers' tag-suffix: '-gpu-intel-sycl-f32-rerankers'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
skip-drivers: 'false'
backend: "rerankers"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
- build-type: 'sycl_f16'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f16-rerankers'
runs-on: 'ubuntu-latest' runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest" base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
skip-drivers: 'false' skip-drivers: 'false'
@@ -417,36 +429,60 @@ jobs:
backend: "llama-cpp" backend: "llama-cpp"
dockerfile: "./backend/Dockerfile.llama-cpp" dockerfile: "./backend/Dockerfile.llama-cpp"
context: "./" context: "./"
- build-type: 'intel' - build-type: 'sycl_f32'
cuda-major-version: "" cuda-major-version: ""
cuda-minor-version: "" cuda-minor-version: ""
platforms: 'linux/amd64' platforms: 'linux/amd64'
tag-latest: 'auto' tag-latest: 'auto'
tag-suffix: '-gpu-intel-vllm' tag-suffix: '-gpu-intel-sycl-f32-vllm'
runs-on: 'ubuntu-latest' runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest" base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
skip-drivers: 'false' skip-drivers: 'false'
backend: "vllm" backend: "vllm"
dockerfile: "./backend/Dockerfile.python" dockerfile: "./backend/Dockerfile.python"
context: "./backend" context: "./backend"
- build-type: 'intel' - build-type: 'sycl_f16'
cuda-major-version: "" cuda-major-version: ""
cuda-minor-version: "" cuda-minor-version: ""
platforms: 'linux/amd64' platforms: 'linux/amd64'
tag-latest: 'auto' tag-latest: 'auto'
tag-suffix: '-gpu-intel-transformers' tag-suffix: '-gpu-intel-sycl-f16-vllm'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
skip-drivers: 'false'
backend: "vllm"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
- build-type: 'sycl_f32'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f32-transformers'
runs-on: 'ubuntu-latest' runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest" base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
skip-drivers: 'false' skip-drivers: 'false'
backend: "transformers" backend: "transformers"
dockerfile: "./backend/Dockerfile.python" dockerfile: "./backend/Dockerfile.python"
context: "./backend" context: "./backend"
- build-type: 'intel' - build-type: 'sycl_f16'
cuda-major-version: "" cuda-major-version: ""
cuda-minor-version: "" cuda-minor-version: ""
platforms: 'linux/amd64' platforms: 'linux/amd64'
tag-latest: 'auto' tag-latest: 'auto'
tag-suffix: '-gpu-intel-diffusers' tag-suffix: '-gpu-intel-sycl-f16-transformers'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
skip-drivers: 'false'
backend: "transformers"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
- build-type: 'sycl_f32'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f32-diffusers'
runs-on: 'ubuntu-latest' runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest" base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
skip-drivers: 'false' skip-drivers: 'false'
@@ -454,48 +490,96 @@ jobs:
dockerfile: "./backend/Dockerfile.python" dockerfile: "./backend/Dockerfile.python"
context: "./backend" context: "./backend"
# SYCL additional backends # SYCL additional backends
- build-type: 'intel' - build-type: 'sycl_f32'
cuda-major-version: "" cuda-major-version: ""
cuda-minor-version: "" cuda-minor-version: ""
platforms: 'linux/amd64' platforms: 'linux/amd64'
tag-latest: 'auto' tag-latest: 'auto'
tag-suffix: '-gpu-intel-kokoro' tag-suffix: '-gpu-intel-sycl-f32-kokoro'
runs-on: 'ubuntu-latest' runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest" base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
skip-drivers: 'false' skip-drivers: 'false'
backend: "kokoro" backend: "kokoro"
dockerfile: "./backend/Dockerfile.python" dockerfile: "./backend/Dockerfile.python"
context: "./backend" context: "./backend"
- build-type: 'intel' - build-type: 'sycl_f16'
cuda-major-version: "" cuda-major-version: ""
cuda-minor-version: "" cuda-minor-version: ""
platforms: 'linux/amd64' platforms: 'linux/amd64'
tag-latest: 'auto' tag-latest: 'auto'
tag-suffix: '-gpu-intel-faster-whisper' tag-suffix: '-gpu-intel-sycl-f16-kokoro'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
skip-drivers: 'false'
backend: "kokoro"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
- build-type: 'sycl_f32'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f32-faster-whisper'
runs-on: 'ubuntu-latest' runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest" base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
skip-drivers: 'false' skip-drivers: 'false'
backend: "faster-whisper" backend: "faster-whisper"
dockerfile: "./backend/Dockerfile.python" dockerfile: "./backend/Dockerfile.python"
context: "./backend" context: "./backend"
- build-type: 'intel' - build-type: 'sycl_f16'
cuda-major-version: "" cuda-major-version: ""
cuda-minor-version: "" cuda-minor-version: ""
platforms: 'linux/amd64' platforms: 'linux/amd64'
tag-latest: 'auto' tag-latest: 'auto'
tag-suffix: '-gpu-intel-coqui' tag-suffix: '-gpu-intel-sycl-f16-faster-whisper'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
skip-drivers: 'false'
backend: "faster-whisper"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
- build-type: 'sycl_f32'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f32-coqui'
runs-on: 'ubuntu-latest' runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest" base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
skip-drivers: 'false' skip-drivers: 'false'
backend: "coqui" backend: "coqui"
dockerfile: "./backend/Dockerfile.python" dockerfile: "./backend/Dockerfile.python"
context: "./backend" context: "./backend"
- build-type: 'intel' - build-type: 'sycl_f16'
cuda-major-version: "" cuda-major-version: ""
cuda-minor-version: "" cuda-minor-version: ""
platforms: 'linux/amd64' platforms: 'linux/amd64'
tag-latest: 'auto' tag-latest: 'auto'
tag-suffix: '-gpu-intel-bark' tag-suffix: '-gpu-intel-sycl-f16-coqui'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
skip-drivers: 'false'
backend: "coqui"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
- build-type: 'sycl_f32'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f32-bark'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
skip-drivers: 'false'
backend: "bark"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
- build-type: 'sycl_f16'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-intel-sycl-f16-bark'
runs-on: 'ubuntu-latest' runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest" base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
skip-drivers: 'false' skip-drivers: 'false'
@@ -513,7 +597,7 @@ jobs:
base-image: "ubuntu:22.04" base-image: "ubuntu:22.04"
skip-drivers: 'false' skip-drivers: 'false'
backend: "piper" backend: "piper"
dockerfile: "./backend/Dockerfile.golang" dockerfile: "./backend/Dockerfile.go"
context: "./" context: "./"
# bark-cpp # bark-cpp
- build-type: '' - build-type: ''
@@ -526,7 +610,7 @@ jobs:
base-image: "ubuntu:22.04" base-image: "ubuntu:22.04"
skip-drivers: 'false' skip-drivers: 'false'
backend: "bark-cpp" backend: "bark-cpp"
dockerfile: "./backend/Dockerfile.golang" dockerfile: "./backend/Dockerfile.go"
context: "./" context: "./"
- build-type: '' - build-type: ''
cuda-major-version: "" cuda-major-version: ""
@@ -575,7 +659,7 @@ jobs:
base-image: "ubuntu:22.04" base-image: "ubuntu:22.04"
skip-drivers: 'false' skip-drivers: 'false'
backend: "stablediffusion-ggml" backend: "stablediffusion-ggml"
dockerfile: "./backend/Dockerfile.golang" dockerfile: "./backend/Dockerfile.go"
context: "./" context: "./"
- build-type: 'cublas' - build-type: 'cublas'
cuda-major-version: "12" cuda-major-version: "12"
@@ -587,7 +671,7 @@ jobs:
base-image: "ubuntu:22.04" base-image: "ubuntu:22.04"
skip-drivers: 'false' skip-drivers: 'false'
backend: "stablediffusion-ggml" backend: "stablediffusion-ggml"
dockerfile: "./backend/Dockerfile.golang" dockerfile: "./backend/Dockerfile.go"
context: "./" context: "./"
- build-type: 'cublas' - build-type: 'cublas'
cuda-major-version: "11" cuda-major-version: "11"
@@ -599,7 +683,7 @@ jobs:
base-image: "ubuntu:22.04" base-image: "ubuntu:22.04"
skip-drivers: 'false' skip-drivers: 'false'
backend: "stablediffusion-ggml" backend: "stablediffusion-ggml"
dockerfile: "./backend/Dockerfile.golang" dockerfile: "./backend/Dockerfile.go"
context: "./" context: "./"
- build-type: 'sycl_f32' - build-type: 'sycl_f32'
cuda-major-version: "" cuda-major-version: ""
@@ -611,7 +695,7 @@ jobs:
base-image: "quay.io/go-skynet/intel-oneapi-base:latest" base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
skip-drivers: 'false' skip-drivers: 'false'
backend: "stablediffusion-ggml" backend: "stablediffusion-ggml"
dockerfile: "./backend/Dockerfile.golang" dockerfile: "./backend/Dockerfile.go"
context: "./" context: "./"
- build-type: 'sycl_f16' - build-type: 'sycl_f16'
cuda-major-version: "" cuda-major-version: ""
@@ -623,7 +707,7 @@ jobs:
base-image: "quay.io/go-skynet/intel-oneapi-base:latest" base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
skip-drivers: 'false' skip-drivers: 'false'
backend: "stablediffusion-ggml" backend: "stablediffusion-ggml"
dockerfile: "./backend/Dockerfile.golang" dockerfile: "./backend/Dockerfile.go"
context: "./" context: "./"
- build-type: 'vulkan' - build-type: 'vulkan'
cuda-major-version: "" cuda-major-version: ""
@@ -635,7 +719,7 @@ jobs:
base-image: "ubuntu:22.04" base-image: "ubuntu:22.04"
skip-drivers: 'false' skip-drivers: 'false'
backend: "stablediffusion-ggml" backend: "stablediffusion-ggml"
dockerfile: "./backend/Dockerfile.golang" dockerfile: "./backend/Dockerfile.go"
context: "./" context: "./"
- build-type: 'cublas' - build-type: 'cublas'
cuda-major-version: "12" cuda-major-version: "12"
@@ -647,7 +731,7 @@ jobs:
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
runs-on: 'ubuntu-24.04-arm' runs-on: 'ubuntu-24.04-arm'
backend: "stablediffusion-ggml" backend: "stablediffusion-ggml"
dockerfile: "./backend/Dockerfile.golang" dockerfile: "./backend/Dockerfile.go"
context: "./" context: "./"
# whisper # whisper
- build-type: '' - build-type: ''
@@ -660,7 +744,7 @@ jobs:
base-image: "ubuntu:22.04" base-image: "ubuntu:22.04"
skip-drivers: 'false' skip-drivers: 'false'
backend: "whisper" backend: "whisper"
dockerfile: "./backend/Dockerfile.golang" dockerfile: "./backend/Dockerfile.go"
context: "./" context: "./"
- build-type: 'cublas' - build-type: 'cublas'
cuda-major-version: "12" cuda-major-version: "12"
@@ -672,7 +756,7 @@ jobs:
base-image: "ubuntu:22.04" base-image: "ubuntu:22.04"
skip-drivers: 'false' skip-drivers: 'false'
backend: "whisper" backend: "whisper"
dockerfile: "./backend/Dockerfile.golang" dockerfile: "./backend/Dockerfile.go"
context: "./" context: "./"
- build-type: 'cublas' - build-type: 'cublas'
cuda-major-version: "11" cuda-major-version: "11"
@@ -684,7 +768,7 @@ jobs:
base-image: "ubuntu:22.04" base-image: "ubuntu:22.04"
skip-drivers: 'false' skip-drivers: 'false'
backend: "whisper" backend: "whisper"
dockerfile: "./backend/Dockerfile.golang" dockerfile: "./backend/Dockerfile.go"
context: "./" context: "./"
- build-type: 'sycl_f32' - build-type: 'sycl_f32'
cuda-major-version: "" cuda-major-version: ""
@@ -696,7 +780,7 @@ jobs:
base-image: "quay.io/go-skynet/intel-oneapi-base:latest" base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
skip-drivers: 'false' skip-drivers: 'false'
backend: "whisper" backend: "whisper"
dockerfile: "./backend/Dockerfile.golang" dockerfile: "./backend/Dockerfile.go"
context: "./" context: "./"
- build-type: 'sycl_f16' - build-type: 'sycl_f16'
cuda-major-version: "" cuda-major-version: ""
@@ -708,7 +792,7 @@ jobs:
base-image: "quay.io/go-skynet/intel-oneapi-base:latest" base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
skip-drivers: 'false' skip-drivers: 'false'
backend: "whisper" backend: "whisper"
dockerfile: "./backend/Dockerfile.golang" dockerfile: "./backend/Dockerfile.go"
context: "./" context: "./"
- build-type: 'vulkan' - build-type: 'vulkan'
cuda-major-version: "" cuda-major-version: ""
@@ -720,7 +804,7 @@ jobs:
base-image: "ubuntu:22.04" base-image: "ubuntu:22.04"
skip-drivers: 'false' skip-drivers: 'false'
backend: "whisper" backend: "whisper"
dockerfile: "./backend/Dockerfile.golang" dockerfile: "./backend/Dockerfile.go"
context: "./" context: "./"
- build-type: 'cublas' - build-type: 'cublas'
cuda-major-version: "12" cuda-major-version: "12"
@@ -732,7 +816,7 @@ jobs:
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
runs-on: 'ubuntu-24.04-arm' runs-on: 'ubuntu-24.04-arm'
backend: "whisper" backend: "whisper"
dockerfile: "./backend/Dockerfile.golang" dockerfile: "./backend/Dockerfile.go"
context: "./" context: "./"
- build-type: 'hipblas' - build-type: 'hipblas'
cuda-major-version: "" cuda-major-version: ""
@@ -744,7 +828,7 @@ jobs:
runs-on: 'ubuntu-latest' runs-on: 'ubuntu-latest'
skip-drivers: 'false' skip-drivers: 'false'
backend: "whisper" backend: "whisper"
dockerfile: "./backend/Dockerfile.golang" dockerfile: "./backend/Dockerfile.go"
context: "./" context: "./"
#silero-vad #silero-vad
- build-type: '' - build-type: ''
@@ -757,7 +841,7 @@ jobs:
base-image: "ubuntu:22.04" base-image: "ubuntu:22.04"
skip-drivers: 'false' skip-drivers: 'false'
backend: "silero-vad" backend: "silero-vad"
dockerfile: "./backend/Dockerfile.golang" dockerfile: "./backend/Dockerfile.go"
context: "./" context: "./"
# local-store # local-store
- build-type: '' - build-type: ''
@@ -770,7 +854,7 @@ jobs:
base-image: "ubuntu:22.04" base-image: "ubuntu:22.04"
skip-drivers: 'false' skip-drivers: 'false'
backend: "local-store" backend: "local-store"
dockerfile: "./backend/Dockerfile.golang" dockerfile: "./backend/Dockerfile.go"
context: "./" context: "./"
# huggingface # huggingface
- build-type: '' - build-type: ''
@@ -783,143 +867,8 @@ jobs:
base-image: "ubuntu:22.04" base-image: "ubuntu:22.04"
skip-drivers: 'false' skip-drivers: 'false'
backend: "huggingface" backend: "huggingface"
dockerfile: "./backend/Dockerfile.golang" dockerfile: "./backend/Dockerfile.go"
context: "./" context: "./"
# rfdetr
- build-type: ''
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64,linux/arm64'
tag-latest: 'auto'
tag-suffix: '-cpu-rfdetr'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
skip-drivers: 'false'
backend: "rfdetr"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-rfdetr'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
skip-drivers: 'false'
backend: "rfdetr"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
- build-type: 'cublas'
cuda-major-version: "11"
cuda-minor-version: "7"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-11-rfdetr'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
skip-drivers: 'false'
backend: "rfdetr"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
- build-type: 'intel'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-intel-rfdetr'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
skip-drivers: 'false'
backend: "rfdetr"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/arm64'
skip-drivers: 'true'
tag-latest: 'auto'
tag-suffix: '-nvidia-l4t-arm64-rfdetr'
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
runs-on: 'ubuntu-24.04-arm'
backend: "rfdetr"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
# exllama2
- build-type: ''
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-cpu-exllama2'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
skip-drivers: 'false'
backend: "exllama2"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "0"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12-exllama2'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
skip-drivers: 'false'
backend: "exllama2"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
- build-type: 'cublas'
cuda-major-version: "11"
cuda-minor-version: "7"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-11-exllama2'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
skip-drivers: 'false'
backend: "exllama2"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
- build-type: 'intel'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-gpu-intel-exllama2'
runs-on: 'ubuntu-latest'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
skip-drivers: 'false'
backend: "exllama2"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
- build-type: 'hipblas'
cuda-major-version: ""
cuda-minor-version: ""
platforms: 'linux/amd64'
skip-drivers: 'true'
tag-latest: 'auto'
tag-suffix: '-gpu-hipblas-exllama2'
base-image: "rocm/dev-ubuntu-22.04:6.1"
runs-on: 'ubuntu-latest'
backend: "exllama2"
dockerfile: "./backend/Dockerfile.python"
context: "./backend"
# runs out of space on the runner
# - build-type: 'hipblas'
# cuda-major-version: ""
# cuda-minor-version: ""
# platforms: 'linux/amd64'
# tag-latest: 'auto'
# tag-suffix: '-gpu-hipblas-rfdetr'
# base-image: "rocm/dev-ubuntu-22.04:6.1"
# runs-on: 'ubuntu-latest'
# skip-drivers: 'false'
# backend: "rfdetr"
# dockerfile: "./backend/Dockerfile.python"
# context: "./backend"
llama-cpp-darwin: llama-cpp-darwin:
runs-on: macOS-14 runs-on: macOS-14
strategy: strategy:

View File

@@ -21,7 +21,7 @@ jobs:
variable: "BARKCPP_VERSION" variable: "BARKCPP_VERSION"
branch: "main" branch: "main"
file: "Makefile" file: "Makefile"
- repository: "leejet/stable-diffusion.cpp" - repository: "richiejp/stable-diffusion.cpp"
variable: "STABLEDIFFUSION_GGML_VERSION" variable: "STABLEDIFFUSION_GGML_VERSION"
branch: "master" branch: "master"
file: "backend/go/stablediffusion-ggml/Makefile" file: "backend/go/stablediffusion-ggml/Makefile"

View File

@@ -39,7 +39,7 @@ jobs:
cuda-minor-version: "0" cuda-minor-version: "0"
platforms: 'linux/amd64' platforms: 'linux/amd64'
tag-latest: 'false' tag-latest: 'false'
tag-suffix: '-gpu-nvidia-cuda-12' tag-suffix: '-gpu-nvidia-cuda12'
runs-on: 'ubuntu-latest' runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04" base-image: "ubuntu:22.04"
makeflags: "--jobs=3 --output-sync=target" makeflags: "--jobs=3 --output-sync=target"
@@ -51,12 +51,12 @@ jobs:
grpc-base-image: "ubuntu:22.04" grpc-base-image: "ubuntu:22.04"
runs-on: 'ubuntu-latest' runs-on: 'ubuntu-latest'
makeflags: "--jobs=3 --output-sync=target" makeflags: "--jobs=3 --output-sync=target"
- build-type: 'sycl' - build-type: 'sycl_f16'
platforms: 'linux/amd64' platforms: 'linux/amd64'
tag-latest: 'false' tag-latest: 'false'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest" base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
grpc-base-image: "ubuntu:22.04" grpc-base-image: "ubuntu:22.04"
tag-suffix: 'sycl' tag-suffix: 'sycl-f16'
runs-on: 'ubuntu-latest' runs-on: 'ubuntu-latest'
makeflags: "--jobs=3 --output-sync=target" makeflags: "--jobs=3 --output-sync=target"
- build-type: 'vulkan' - build-type: 'vulkan'

View File

@@ -83,7 +83,7 @@ jobs:
cuda-minor-version: "7" cuda-minor-version: "7"
platforms: 'linux/amd64' platforms: 'linux/amd64'
tag-latest: 'auto' tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-11' tag-suffix: '-gpu-nvidia-cuda11'
runs-on: 'ubuntu-latest' runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04" base-image: "ubuntu:22.04"
makeflags: "--jobs=4 --output-sync=target" makeflags: "--jobs=4 --output-sync=target"
@@ -94,7 +94,7 @@ jobs:
cuda-minor-version: "0" cuda-minor-version: "0"
platforms: 'linux/amd64' platforms: 'linux/amd64'
tag-latest: 'auto' tag-latest: 'auto'
tag-suffix: '-gpu-nvidia-cuda-12' tag-suffix: '-gpu-nvidia-cuda12'
runs-on: 'ubuntu-latest' runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04" base-image: "ubuntu:22.04"
skip-drivers: 'false' skip-drivers: 'false'
@@ -103,21 +103,30 @@ jobs:
- build-type: 'vulkan' - build-type: 'vulkan'
platforms: 'linux/amd64' platforms: 'linux/amd64'
tag-latest: 'auto' tag-latest: 'auto'
tag-suffix: '-gpu-vulkan' tag-suffix: '-vulkan'
runs-on: 'ubuntu-latest' runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04" base-image: "ubuntu:22.04"
skip-drivers: 'false' skip-drivers: 'false'
makeflags: "--jobs=4 --output-sync=target" makeflags: "--jobs=4 --output-sync=target"
aio: "-aio-gpu-vulkan" aio: "-aio-gpu-vulkan"
- build-type: 'sycl' - build-type: 'sycl_f16'
platforms: 'linux/amd64' platforms: 'linux/amd64'
tag-latest: 'auto' tag-latest: 'auto'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest" base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
grpc-base-image: "ubuntu:22.04" grpc-base-image: "ubuntu:22.04"
tag-suffix: '-gpu-intel' tag-suffix: '-gpu-intel-f16'
runs-on: 'ubuntu-latest' runs-on: 'ubuntu-latest'
makeflags: "--jobs=3 --output-sync=target" makeflags: "--jobs=3 --output-sync=target"
aio: "-aio-gpu-intel" aio: "-aio-gpu-intel-f16"
- build-type: 'sycl_f32'
platforms: 'linux/amd64'
tag-latest: 'auto'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
grpc-base-image: "ubuntu:22.04"
tag-suffix: '-gpu-intel-f32'
runs-on: 'ubuntu-latest'
makeflags: "--jobs=3 --output-sync=target"
aio: "-aio-gpu-intel-f32"
gh-runner: gh-runner:
uses: ./.github/workflows/image_build.yml uses: ./.github/workflows/image_build.yml

1
.gitignore vendored
View File

@@ -12,7 +12,6 @@ prepare-sources
/backends /backends
/backend-images /backend-images
/result.yaml /result.yaml
protoc
*.log *.log

View File

@@ -72,12 +72,6 @@ RUN <<EOT bash
fi fi
EOT EOT
RUN <<EOT bash
if [ "${BUILD_TYPE}" = "cublas" ] && [ "${TARGETARCH}" = "arm64" ]; then
echo "nvidia-l4t" > /run/localai/capability
fi
EOT
# If we are building with clblas support, we need the libraries for the builds # If we are building with clblas support, we need the libraries for the builds
RUN if [ "${BUILD_TYPE}" = "clblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \ RUN if [ "${BUILD_TYPE}" = "clblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
apt-get update && \ apt-get update && \
@@ -100,11 +94,6 @@ RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
ldconfig \ ldconfig \
; fi ; fi
RUN expr "${BUILD_TYPE}" : sycl && \
echo "intel" > /run/localai/capability || \
echo "Not Intel"
# Cuda # Cuda
ENV PATH=/usr/local/cuda/bin:${PATH} ENV PATH=/usr/local/cuda/bin:${PATH}

155
Makefile
View File

@@ -5,6 +5,8 @@ BINARY_NAME=local-ai
GORELEASER?= GORELEASER?=
ONEAPI_VERSION?=2025.2
export BUILD_TYPE?= export BUILD_TYPE?=
GO_TAGS?= GO_TAGS?=
@@ -143,7 +145,7 @@ backends/stablediffusion-ggml: docker-build-stablediffusion-ggml docker-save-sta
backends/whisper: docker-build-whisper docker-save-whisper build backends/whisper: docker-build-whisper docker-save-whisper build
./local-ai backends install "ocifile://$(abspath ./backend-images/whisper.tar)" ./local-ai backends install "ocifile://$(abspath ./backend-images/whisper.tar)"
backends/silero-vad: docker-build-silero-vad docker-save-silero-vad build backends/silero-vad: docker-build-silero-vad docker-save-silero-vad build
./local-ai backends install "ocifile://$(abspath ./backend-images/silero-vad.tar)" ./local-ai backends install "ocifile://$(abspath ./backend-images/silero-vad.tar)"
@@ -153,9 +155,6 @@ backends/local-store: docker-build-local-store docker-save-local-store build
backends/huggingface: docker-build-huggingface docker-save-huggingface build backends/huggingface: docker-build-huggingface docker-save-huggingface build
./local-ai backends install "ocifile://$(abspath ./backend-images/huggingface.tar)" ./local-ai backends install "ocifile://$(abspath ./backend-images/huggingface.tar)"
backends/rfdetr: docker-build-rfdetr docker-save-rfdetr build
./local-ai backends install "ocifile://$(abspath ./backend-images/rfdetr.tar)"
######################################################## ########################################################
## AIO tests ## AIO tests
######################################################## ########################################################
@@ -243,7 +242,10 @@ help: ## Show this help.
######################################################## ########################################################
.PHONY: protogen .PHONY: protogen
protogen: protogen-go protogen: protogen-go protogen-python
.PHONY: protogen-clean
protogen-clean: protogen-go-clean protogen-python-clean
protoc: protoc:
@OS_NAME=$$(uname -s | tr '[:upper:]' '[:lower:]'); \ @OS_NAME=$$(uname -s | tr '[:upper:]' '[:lower:]'); \
@@ -288,6 +290,93 @@ protogen-go-clean:
$(RM) pkg/grpc/proto/backend.pb.go pkg/grpc/proto/backend_grpc.pb.go $(RM) pkg/grpc/proto/backend.pb.go pkg/grpc/proto/backend_grpc.pb.go
$(RM) bin/* $(RM) bin/*
.PHONY: protogen-python
protogen-python: bark-protogen coqui-protogen chatterbox-protogen diffusers-protogen exllama2-protogen rerankers-protogen transformers-protogen kokoro-protogen vllm-protogen faster-whisper-protogen
.PHONY: protogen-python-clean
protogen-python-clean: bark-protogen-clean coqui-protogen-clean chatterbox-protogen-clean diffusers-protogen-clean exllama2-protogen-clean rerankers-protogen-clean transformers-protogen-clean kokoro-protogen-clean vllm-protogen-clean faster-whisper-protogen-clean
.PHONY: bark-protogen
bark-protogen:
$(MAKE) -C backend/python/bark protogen
.PHONY: bark-protogen-clean
bark-protogen-clean:
$(MAKE) -C backend/python/bark protogen-clean
.PHONY: coqui-protogen
coqui-protogen:
$(MAKE) -C backend/python/coqui protogen
.PHONY: coqui-protogen-clean
coqui-protogen-clean:
$(MAKE) -C backend/python/coqui protogen-clean
.PHONY: diffusers-protogen
diffusers-protogen:
$(MAKE) -C backend/python/diffusers protogen
.PHONY: chatterbox-protogen
chatterbox-protogen:
$(MAKE) -C backend/python/chatterbox protogen
.PHONY: diffusers-protogen-clean
diffusers-protogen-clean:
$(MAKE) -C backend/python/diffusers protogen-clean
.PHONY: chatterbox-protogen-clean
chatterbox-protogen-clean:
$(MAKE) -C backend/python/chatterbox protogen-clean
.PHONY: faster-whisper-protogen
faster-whisper-protogen:
$(MAKE) -C backend/python/faster-whisper protogen
.PHONY: faster-whisper-protogen-clean
faster-whisper-protogen-clean:
$(MAKE) -C backend/python/faster-whisper protogen-clean
.PHONY: exllama2-protogen
exllama2-protogen:
$(MAKE) -C backend/python/exllama2 protogen
.PHONY: exllama2-protogen-clean
exllama2-protogen-clean:
$(MAKE) -C backend/python/exllama2 protogen-clean
.PHONY: rerankers-protogen
rerankers-protogen:
$(MAKE) -C backend/python/rerankers protogen
.PHONY: rerankers-protogen-clean
rerankers-protogen-clean:
$(MAKE) -C backend/python/rerankers protogen-clean
.PHONY: transformers-protogen
transformers-protogen:
$(MAKE) -C backend/python/transformers protogen
.PHONY: transformers-protogen-clean
transformers-protogen-clean:
$(MAKE) -C backend/python/transformers protogen-clean
.PHONY: kokoro-protogen
kokoro-protogen:
$(MAKE) -C backend/python/kokoro protogen
.PHONY: kokoro-protogen-clean
kokoro-protogen-clean:
$(MAKE) -C backend/python/kokoro protogen-clean
.PHONY: vllm-protogen
vllm-protogen:
$(MAKE) -C backend/python/vllm protogen
.PHONY: vllm-protogen-clean
vllm-protogen-clean:
$(MAKE) -C backend/python/vllm protogen-clean
prepare-test-extra: protogen-python prepare-test-extra: protogen-python
$(MAKE) -C backend/python/transformers $(MAKE) -C backend/python/transformers
$(MAKE) -C backend/python/diffusers $(MAKE) -C backend/python/diffusers
@@ -323,7 +412,7 @@ docker-cuda11:
--build-arg GO_TAGS="$(GO_TAGS)" \ --build-arg GO_TAGS="$(GO_TAGS)" \
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \ --build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
--build-arg BUILD_TYPE=$(BUILD_TYPE) \ --build-arg BUILD_TYPE=$(BUILD_TYPE) \
-t $(DOCKER_IMAGE)-cuda-11 . -t $(DOCKER_IMAGE)-cuda11 .
docker-aio: docker-aio:
@echo "Building AIO image with base $(BASE_IMAGE) as $(DOCKER_AIO_IMAGE)" @echo "Building AIO image with base $(BASE_IMAGE) as $(DOCKER_AIO_IMAGE)"
@@ -338,11 +427,19 @@ docker-aio-all:
docker-image-intel: docker-image-intel:
docker build \ docker build \
--build-arg BASE_IMAGE=quay.io/go-skynet/intel-oneapi-base:latest \ --build-arg BASE_IMAGE=intel/oneapi-basekit:${ONEAPI_VERSION}.0-0-devel-ubuntu24.04 \
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \ --build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
--build-arg GO_TAGS="$(GO_TAGS)" \ --build-arg GO_TAGS="$(GO_TAGS)" \
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \ --build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
--build-arg BUILD_TYPE=sycl -t $(DOCKER_IMAGE) . --build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) .
docker-image-intel-xpu:
docker build \
--build-arg BASE_IMAGE=intel/oneapi-basekit:${ONEAPI_VERSION}.0-0-devel-ubuntu22.04 \
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
--build-arg GO_TAGS="$(GO_TAGS)" \
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
--build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) .
######################################################## ########################################################
## Backends ## Backends
@@ -352,25 +449,19 @@ backend-images:
mkdir -p backend-images mkdir -p backend-images
docker-build-llama-cpp: docker-build-llama-cpp:
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:llama-cpp -f backend/Dockerfile.llama-cpp . docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg IMAGE_BASE=$(IMAGE_BASE) -t local-ai-backend:llama-cpp -f backend/Dockerfile.llama-cpp .
docker-build-bark-cpp: docker-build-bark-cpp:
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:bark-cpp -f backend/Dockerfile.golang --build-arg BACKEND=bark-cpp . docker build -t local-ai-backend:bark-cpp -f backend/Dockerfile.go --build-arg BACKEND=bark-cpp .
docker-build-piper: docker-build-piper:
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:piper -f backend/Dockerfile.golang --build-arg BACKEND=piper . docker build -t local-ai-backend:piper -f backend/Dockerfile.go --build-arg BACKEND=piper .
docker-build-local-store: docker-build-local-store:
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:local-store -f backend/Dockerfile.golang --build-arg BACKEND=local-store . docker build -t local-ai-backend:local-store -f backend/Dockerfile.go --build-arg BACKEND=local-store .
docker-build-huggingface: docker-build-huggingface:
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:huggingface -f backend/Dockerfile.golang --build-arg BACKEND=huggingface . docker build -t local-ai-backend:huggingface -f backend/Dockerfile.go --build-arg BACKEND=huggingface .
docker-build-rfdetr:
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:rfdetr -f backend/Dockerfile.python --build-arg BACKEND=rfdetr ./backend
docker-save-rfdetr: backend-images
docker save local-ai-backend:rfdetr -o backend-images/rfdetr.tar
docker-save-huggingface: backend-images docker-save-huggingface: backend-images
docker save local-ai-backend:huggingface -o backend-images/huggingface.tar docker save local-ai-backend:huggingface -o backend-images/huggingface.tar
@@ -379,7 +470,7 @@ docker-save-local-store: backend-images
docker save local-ai-backend:local-store -o backend-images/local-store.tar docker save local-ai-backend:local-store -o backend-images/local-store.tar
docker-build-silero-vad: docker-build-silero-vad:
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:silero-vad -f backend/Dockerfile.golang --build-arg BACKEND=silero-vad . docker build -t local-ai-backend:silero-vad -f backend/Dockerfile.go --build-arg BACKEND=silero-vad .
docker-save-silero-vad: backend-images docker-save-silero-vad: backend-images
docker save local-ai-backend:silero-vad -o backend-images/silero-vad.tar docker save local-ai-backend:silero-vad -o backend-images/silero-vad.tar
@@ -394,46 +485,46 @@ docker-save-bark-cpp: backend-images
docker save local-ai-backend:bark-cpp -o backend-images/bark-cpp.tar docker save local-ai-backend:bark-cpp -o backend-images/bark-cpp.tar
docker-build-stablediffusion-ggml: docker-build-stablediffusion-ggml:
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:stablediffusion-ggml -f backend/Dockerfile.golang --build-arg BACKEND=stablediffusion-ggml . docker build -t local-ai-backend:stablediffusion-ggml -f backend/Dockerfile.go --build-arg BACKEND=stablediffusion-ggml .
docker-save-stablediffusion-ggml: backend-images docker-save-stablediffusion-ggml: backend-images
docker save local-ai-backend:stablediffusion-ggml -o backend-images/stablediffusion-ggml.tar docker save local-ai-backend:stablediffusion-ggml -o backend-images/stablediffusion-ggml.tar
docker-build-rerankers: docker-build-rerankers:
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:rerankers -f backend/Dockerfile.python --build-arg BACKEND=rerankers . docker build -t local-ai-backend:rerankers -f backend/Dockerfile.python --build-arg BACKEND=rerankers .
docker-build-vllm: docker-build-vllm:
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:vllm -f backend/Dockerfile.python --build-arg BACKEND=vllm . docker build -t local-ai-backend:vllm -f backend/Dockerfile.python --build-arg BACKEND=vllm .
docker-build-transformers: docker-build-transformers:
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:transformers -f backend/Dockerfile.python --build-arg BACKEND=transformers . docker build -t local-ai-backend:transformers -f backend/Dockerfile.python --build-arg BACKEND=transformers .
docker-build-diffusers: docker-build-diffusers:
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:diffusers -f backend/Dockerfile.python --build-arg BACKEND=diffusers . docker build -t local-ai-backend:diffusers -f backend/Dockerfile.python --build-arg BACKEND=diffusers .
docker-build-kokoro: docker-build-kokoro:
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:kokoro -f backend/Dockerfile.python --build-arg BACKEND=kokoro . docker build -t local-ai-backend:kokoro -f backend/Dockerfile.python --build-arg BACKEND=kokoro .
docker-build-whisper: docker-build-whisper:
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:whisper -f backend/Dockerfile.golang --build-arg BACKEND=whisper . docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:whisper -f backend/Dockerfile.go --build-arg BACKEND=whisper .
docker-save-whisper: backend-images docker-save-whisper: backend-images
docker save local-ai-backend:whisper -o backend-images/whisper.tar docker save local-ai-backend:whisper -o backend-images/whisper.tar
docker-build-faster-whisper: docker-build-faster-whisper:
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:faster-whisper -f backend/Dockerfile.python --build-arg BACKEND=faster-whisper . docker build -t local-ai-backend:faster-whisper -f backend/Dockerfile.python --build-arg BACKEND=faster-whisper .
docker-build-coqui: docker-build-coqui:
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:coqui -f backend/Dockerfile.python --build-arg BACKEND=coqui . docker build -t local-ai-backend:coqui -f backend/Dockerfile.python --build-arg BACKEND=coqui .
docker-build-bark: docker-build-bark:
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:bark -f backend/Dockerfile.python --build-arg BACKEND=bark . docker build -t local-ai-backend:bark -f backend/Dockerfile.python --build-arg BACKEND=bark .
docker-build-chatterbox: docker-build-chatterbox:
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:chatterbox -f backend/Dockerfile.python --build-arg BACKEND=chatterbox . docker build -t local-ai-backend:chatterbox -f backend/Dockerfile.python --build-arg BACKEND=chatterbox .
docker-build-exllama2: docker-build-exllama2:
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:exllama2 -f backend/Dockerfile.python --build-arg BACKEND=exllama2 . docker build -t local-ai-backend:exllama2 -f backend/Dockerfile.python --build-arg BACKEND=exllama2 .
docker-build-backends: docker-build-llama-cpp docker-build-rerankers docker-build-vllm docker-build-transformers docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-bark docker-build-chatterbox docker-build-exllama2 docker-build-backends: docker-build-llama-cpp docker-build-rerankers docker-build-vllm docker-build-transformers docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-bark docker-build-chatterbox docker-build-exllama2

View File

@@ -140,7 +140,11 @@ docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri
### Intel GPU Images (oneAPI): ### Intel GPU Images (oneAPI):
```bash ```bash
docker run -ti --name local-ai -p 8080:8080 --device=/dev/dri/card1 --device=/dev/dri/renderD128 localai/localai:latest-gpu-intel # Intel GPU with FP16 support
docker run -ti --name local-ai -p 8080:8080 --device=/dev/dri/card1 --device=/dev/dri/renderD128 localai/localai:latest-gpu-intel-f16
# Intel GPU with FP32 support
docker run -ti --name local-ai -p 8080:8080 --device=/dev/dri/card1 --device=/dev/dri/renderD128 localai/localai:latest-gpu-intel-f32
``` ```
### Vulkan GPU Images: ### Vulkan GPU Images:
@@ -162,7 +166,7 @@ docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-ai
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-11 docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-11
# Intel GPU version # Intel GPU version
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-gpu-intel docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-gpu-intel-f16
# AMD GPU version # AMD GPU version
docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri --group-add=video localai/localai:latest-aio-gpu-hipblas docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri --group-add=video localai/localai:latest-aio-gpu-hipblas
@@ -185,14 +189,10 @@ local-ai run https://gist.githubusercontent.com/.../phi-2.yaml
local-ai run oci://localai/phi-2:latest local-ai run oci://localai/phi-2:latest
``` ```
> ⚡ **Automatic Backend Detection**: When you install models from the gallery or YAML files, LocalAI automatically detects your system's GPU capabilities (NVIDIA, AMD, Intel) and downloads the appropriate backend. For advanced configuration options, see [GPU Acceleration](https://localai.io/features/gpu-acceleration/#automatic-backend-detection).
For more information, see [💻 Getting started](https://localai.io/basics/getting_started/index.html) For more information, see [💻 Getting started](https://localai.io/basics/getting_started/index.html)
## 📰 Latest project news ## 📰 Latest project news
- July/August 2025: 🔍 [Object Detection](https://localai.io/features/object-detection/) added to the API featuring [rf-detr](https://github.com/roboflow/rf-detr)
- July 2025: All backends migrated outside of the main binary. LocalAI is now more lightweight, small, and automatically downloads the required backend to run the model. [Read the release notes](https://github.com/mudler/LocalAI/releases/tag/v3.2.0)
- June 2025: [Backend management](https://github.com/mudler/LocalAI/pull/5607) has been added. Attention: extras images are going to be deprecated from the next release! Read [the backend management PR](https://github.com/mudler/LocalAI/pull/5607). - June 2025: [Backend management](https://github.com/mudler/LocalAI/pull/5607) has been added. Attention: extras images are going to be deprecated from the next release! Read [the backend management PR](https://github.com/mudler/LocalAI/pull/5607).
- May 2025: [Audio input](https://github.com/mudler/LocalAI/pull/5466) and [Reranking](https://github.com/mudler/LocalAI/pull/5396) in llama.cpp backend, [Realtime API](https://github.com/mudler/LocalAI/pull/5392), Support to Gemma, SmollVLM, and more multimodal models (available in the gallery). - May 2025: [Audio input](https://github.com/mudler/LocalAI/pull/5466) and [Reranking](https://github.com/mudler/LocalAI/pull/5396) in llama.cpp backend, [Realtime API](https://github.com/mudler/LocalAI/pull/5392), Support to Gemma, SmollVLM, and more multimodal models (available in the gallery).
- May 2025: Important: image name changes [See release](https://github.com/mudler/LocalAI/releases/tag/v2.29.0) - May 2025: Important: image name changes [See release](https://github.com/mudler/LocalAI/releases/tag/v2.29.0)
@@ -225,7 +225,6 @@ Roadmap items: [List of issues](https://github.com/mudler/LocalAI/issues?q=is%3A
- ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/) - ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/)
- 🖼️ [Download Models directly from Huggingface ](https://localai.io/models/) - 🖼️ [Download Models directly from Huggingface ](https://localai.io/models/)
- 🥽 [Vision API](https://localai.io/features/gpt-vision/) - 🥽 [Vision API](https://localai.io/features/gpt-vision/)
- 🔍 [Object Detection](https://localai.io/features/object-detection/)
- 📈 [Reranker API](https://localai.io/features/reranker/) - 📈 [Reranker API](https://localai.io/features/reranker/)
- 🆕🖧 [P2P Inferencing](https://localai.io/features/distribute/) - 🆕🖧 [P2P Inferencing](https://localai.io/features/distribute/)
- [Agentic capabilities](https://github.com/mudler/LocalAGI) - [Agentic capabilities](https://github.com/mudler/LocalAGI)

View File

@@ -96,6 +96,17 @@ RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
ldconfig \ ldconfig \
; fi ; fi
# Intel oneAPI requirements
RUN <<EOT bash
if [[ "${BUILD_TYPE}" == sycl* ]] && [ "${SKIP_DRIVERS}" = "false" ]; then
apt-get update && \
apt-get install -y --no-install-recommends \
intel-oneapi-runtime-libs && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
fi
EOT
# Install Go # Install Go
RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
ENV PATH=$PATH:/root/go/bin:/usr/local/go/bin:/usr/local/bin ENV PATH=$PATH:/root/go/bin:/usr/local/go/bin:/usr/local/bin

View File

@@ -11,6 +11,7 @@ ARG GRPC_MAKEFLAGS="-j4 -Otarget"
ARG GRPC_VERSION=v1.65.0 ARG GRPC_VERSION=v1.65.0
ARG CMAKE_FROM_SOURCE=false ARG CMAKE_FROM_SOURCE=false
ARG CMAKE_VERSION=3.26.4 ARG CMAKE_VERSION=3.26.4
ARG PROTOBUF_VERSION=v21.12
ENV MAKEFLAGS=${GRPC_MAKEFLAGS} ENV MAKEFLAGS=${GRPC_MAKEFLAGS}
@@ -49,6 +50,14 @@ RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shall
make install && \ make install && \
rm -rf /build rm -rf /build
RUN git clone --recurse-submodules --branch ${PROTOBUF_VERSION} https://github.com/protocolbuffers/protobuf.git && \
mkdir -p /build/protobuf/build && \
cd /build/protobuf/build && \
cmake -Dprotobuf_BUILD_SHARED_LIBS=ON -Dprotobuf_BUILD_TESTS=OFF .. && \
make && \
make install && \
rm -rf /build
FROM ${BASE_IMAGE} AS builder FROM ${BASE_IMAGE} AS builder
ARG BACKEND=rerankers ARG BACKEND=rerankers
ARG BUILD_TYPE ARG BUILD_TYPE
@@ -180,21 +189,9 @@ COPY --from=grpc /opt/grpc /usr/local
COPY . /LocalAI COPY . /LocalAI
## Otherwise just run the normal build RUN make -C /LocalAI/backend/cpp/llama-cpp llama-cpp
RUN <<EOT bash RUN make -C /LocalAI/backend/cpp/llama-cpp llama-cpp-grpc
if [ "${TARGETARCH}" = "arm64" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \ RUN make -C /LocalAI/backend/cpp/llama-cpp llama-cpp-rpc-server
cd /LocalAI/backend/cpp/llama-cpp && make llama-cpp-fallback && \
make llama-cpp-grpc && make llama-cpp-rpc-server; \
else \
cd /LocalAI/backend/cpp/llama-cpp && make llama-cpp-avx && \
make llama-cpp-avx2 && \
make llama-cpp-avx512 && \
make llama-cpp-fallback && \
make llama-cpp-grpc && \
make llama-cpp-rpc-server; \
fi
EOT
# Copy libraries using a script to handle architecture differences # Copy libraries using a script to handle architecture differences
RUN make -C /LocalAI/backend/cpp/llama-cpp package RUN make -C /LocalAI/backend/cpp/llama-cpp package

View File

@@ -20,7 +20,6 @@ service Backend {
rpc SoundGeneration(SoundGenerationRequest) returns (Result) {} rpc SoundGeneration(SoundGenerationRequest) returns (Result) {}
rpc TokenizeString(PredictOptions) returns (TokenizationResponse) {} rpc TokenizeString(PredictOptions) returns (TokenizationResponse) {}
rpc Status(HealthMessage) returns (StatusResponse) {} rpc Status(HealthMessage) returns (StatusResponse) {}
rpc Detect(DetectOptions) returns (DetectResponse) {}
rpc StoresSet(StoresSetOptions) returns (Result) {} rpc StoresSet(StoresSetOptions) returns (Result) {}
rpc StoresDelete(StoresDeleteOptions) returns (Result) {} rpc StoresDelete(StoresDeleteOptions) returns (Result) {}
@@ -305,9 +304,6 @@ message GenerateImageRequest {
// Diffusers // Diffusers
string EnableParameters = 10; string EnableParameters = 10;
int32 CLIPSkip = 11; int32 CLIPSkip = 11;
// Reference images for models that support them (e.g., Flux Kontext)
repeated string ref_images = 12;
} }
message GenerateVideoRequest { message GenerateVideoRequest {
@@ -380,20 +376,3 @@ message Message {
string role = 1; string role = 1;
string content = 2; string content = 2;
} }
message DetectOptions {
string src = 1;
}
message Detection {
float x = 1;
float y = 2;
float width = 3;
float height = 4;
float confidence = 5;
string class_name = 6;
}
message DetectResponse {
repeated Detection Detections = 1;
}

View File

@@ -17,6 +17,8 @@ if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
include_directories("${HOMEBREW_DEFAULT_PREFIX}/include") include_directories("${HOMEBREW_DEFAULT_PREFIX}/include")
endif() endif()
set(Protobuf_USE_STATIC_LIBS OFF)
set(gRPC_USE_STATIC_LIBS OFF)
find_package(absl CONFIG REQUIRED) find_package(absl CONFIG REQUIRED)
find_package(Protobuf CONFIG REQUIRED) find_package(Protobuf CONFIG REQUIRED)
find_package(gRPC CONFIG REQUIRED) find_package(gRPC CONFIG REQUIRED)

View File

@@ -1,5 +1,5 @@
LLAMA_VERSION?=daf2dd788066b8b239cb7f68210e090c2124c199 LLAMA_VERSION?=acd6cb1c41676f6bbb25c2a76fa5abeb1719301e
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
CMAKE_ARGS?= CMAKE_ARGS?=
@@ -7,10 +7,9 @@ BUILD_TYPE?=
NATIVE?=false NATIVE?=false
ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
TARGET?=--target grpc-server TARGET?=--target grpc-server
JOBS?=$(shell nproc)
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static # Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF -DLLAMA_CURL=OFF CMAKE_ARGS+=-DBUILD_SHARED_LIBS=ON -DLLAMA_CURL=OFF -DGGML_CPU_ALL_VARIANTS=ON -DGGML_BACKEND_DL=ON
CURRENT_MAKEFILE_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) CURRENT_MAKEFILE_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
ifeq ($(NATIVE),false) ifeq ($(NATIVE),false)
@@ -26,7 +25,7 @@ else ifeq ($(BUILD_TYPE),openblas)
# If build type is clblas (openCL) we set -DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path # If build type is clblas (openCL) we set -DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
else ifeq ($(BUILD_TYPE),clblas) else ifeq ($(BUILD_TYPE),clblas)
CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++ # If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
else ifeq ($(BUILD_TYPE),hipblas) else ifeq ($(BUILD_TYPE),hipblas)
ROCM_HOME ?= /opt/rocm ROCM_HOME ?= /opt/rocm
ROCM_PATH ?= /opt/rocm ROCM_PATH ?= /opt/rocm
@@ -90,33 +89,12 @@ else
LLAMA_VERSION=$(LLAMA_VERSION) $(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../$(VARIANT) grpc-server LLAMA_VERSION=$(LLAMA_VERSION) $(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../$(VARIANT) grpc-server
endif endif
llama-cpp-avx2: llama.cpp llama-cpp: llama.cpp
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx2-build cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-build
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx2-build purge $(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-build purge
$(info ${GREEN}I llama-cpp build info:avx2${RESET}) $(info ${GREEN}I llama-cpp build info:${RESET})
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-cpp-avx2-build" build-llama-cpp-grpc-server CMAKE_ARGS="$(CMAKE_ARGS)" $(MAKE) VARIANT="llama-cpp-build" build-llama-cpp-grpc-server
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx2-build/grpc-server llama-cpp-avx2 cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-build/grpc-server llama-cpp
llama-cpp-avx512: llama.cpp
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx512-build
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx512-build purge
$(info ${GREEN}I llama-cpp build info:avx512${RESET})
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=on -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-cpp-avx512-build" build-llama-cpp-grpc-server
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx512-build/grpc-server llama-cpp-avx512
llama-cpp-avx: llama.cpp
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx-build
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx-build purge
$(info ${GREEN}I llama-cpp build info:avx${RESET})
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-cpp-avx-build" build-llama-cpp-grpc-server
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx-build/grpc-server llama-cpp-avx
llama-cpp-fallback: llama.cpp
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-fallback-build
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-fallback-build purge
$(info ${GREEN}I llama-cpp build info:fallback${RESET})
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-cpp-fallback-build" build-llama-cpp-grpc-server
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-fallback-build/grpc-server llama-cpp-fallback
llama-cpp-grpc: llama.cpp llama-cpp-grpc: llama.cpp
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build
@@ -161,8 +139,8 @@ grpc-server: llama.cpp llama.cpp/tools/grpc-server
@echo "Building grpc-server with $(BUILD_TYPE) build type and $(CMAKE_ARGS)" @echo "Building grpc-server with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
ifneq (,$(findstring sycl,$(BUILD_TYPE))) ifneq (,$(findstring sycl,$(BUILD_TYPE)))
+bash -c "source $(ONEAPI_VARS); \ +bash -c "source $(ONEAPI_VARS); \
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release -j $(JOBS) $(TARGET)" cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)"
else else
+cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release -j $(JOBS) $(TARGET) +cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)
endif endif
cp llama.cpp/build/bin/grpc-server . cp llama.cpp/build/bin/grpc-server .

View File

@@ -6,34 +6,9 @@ CURDIR=$(dirname "$(realpath $0)")
cd / cd /
echo "CPU info:" BINARY=llama-cpp
grep -e "model\sname" /proc/cpuinfo | head -1
grep -e "flags" /proc/cpuinfo | head -1
BINARY=llama-cpp-fallback
if grep -q -e "\savx\s" /proc/cpuinfo ; then
echo "CPU: AVX found OK"
if [ -e $CURDIR/llama-cpp-avx ]; then
BINARY=llama-cpp-avx
fi
fi
if grep -q -e "\savx2\s" /proc/cpuinfo ; then
echo "CPU: AVX2 found OK"
if [ -e $CURDIR/llama-cpp-avx2 ]; then
BINARY=llama-cpp-avx2
fi
fi
# Check avx 512
if grep -q -e "\savx512f\s" /proc/cpuinfo ; then
echo "CPU: AVX512F found OK"
if [ -e $CURDIR/llama-cpp-avx512 ]; then
BINARY=llama-cpp-avx512
fi
fi
## P2P/GRPC mode
if [ -n "$LLAMACPP_GRPC_SERVERS" ]; then if [ -n "$LLAMACPP_GRPC_SERVERS" ]; then
if [ -e $CURDIR/llama-cpp-grpc ]; then if [ -e $CURDIR/llama-cpp-grpc ]; then
BINARY=llama-cpp-grpc BINARY=llama-cpp-grpc
@@ -56,6 +31,3 @@ fi
echo "Using binary: $BINARY" echo "Using binary: $BINARY"
exec $CURDIR/$BINARY "$@" exec $CURDIR/$BINARY "$@"
# In case we fail execing, just run fallback
exec $CURDIR/llama-cpp-fallback "$@"

View File

@@ -18,8 +18,8 @@ GO_TAGS?=
LD_FLAGS?= LD_FLAGS?=
# stablediffusion.cpp (ggml) # stablediffusion.cpp (ggml)
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp STABLEDIFFUSION_GGML_REPO?=https://github.com/richiejp/stable-diffusion.cpp
STABLEDIFFUSION_GGML_VERSION?=f6b9aa1a4373e322ff12c15b8a0749e6dd6f0253 STABLEDIFFUSION_GGML_VERSION?=53e3b17eb3d0b5760ced06a1f98320b68b34aaae
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static # Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
@@ -91,18 +91,23 @@ endif
# (ggml can have different backends cpu, cuda, etc., each backend generates a .a archive) # (ggml can have different backends cpu, cuda, etc., each backend generates a .a archive)
GGML_ARCHIVE_DIR := build/ggml/src/ GGML_ARCHIVE_DIR := build/ggml/src/
ALL_ARCHIVES := $(shell find $(GGML_ARCHIVE_DIR) -type f -name '*.a') ALL_ARCHIVES := $(shell find $(GGML_ARCHIVE_DIR) -type f -name '*.a')
ALL_OBJS := $(shell find $(GGML_ARCHIVE_DIR) -type f -name '*.o')
# Name of the single merged library # Name of the single merged library
COMBINED_LIB := libggmlall.a COMBINED_LIB := libggmlall.a
# Instead of using the archives generated by GGML, use the object files directly to avoid overwriting objects with the same base name # Rule to merge all the .a files into one
$(COMBINED_LIB): $(ALL_ARCHIVES) $(COMBINED_LIB): $(ALL_ARCHIVES)
@echo "Merging all .o into $(COMBINED_LIB): $(ALL_OBJS)" @echo "Merging all .a into $(COMBINED_LIB)"
rm -f $@ rm -f $@
ar -qc $@ $(ALL_OBJS) mkdir -p merge-tmp
for a in $(ALL_ARCHIVES); do \
( cd merge-tmp && ar x ../$$a ); \
done
( cd merge-tmp && ar rcs ../$@ *.o )
# Ensure we have a proper index # Ensure we have a proper index
ranlib $@ ranlib $@
# Clean up
rm -rf merge-tmp
build/libstable-diffusion.a: build/libstable-diffusion.a:
@echo "Building SD with $(BUILD_TYPE) build type and $(CMAKE_ARGS)" @echo "Building SD with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"

View File

@@ -5,7 +5,6 @@
#include <random> #include <random>
#include <string> #include <string>
#include <vector> #include <vector>
#include <filesystem>
#include "gosd.h" #include "gosd.h"
// #include "preprocessing.hpp" // #include "preprocessing.hpp"
@@ -54,43 +53,9 @@ sd_ctx_t* sd_c;
sample_method_t sample_method; sample_method_t sample_method;
// Copied from the upstream CLI int load_model(char *model, char* options[], int threads, int diff) {
void sd_log_cb(enum sd_log_level_t level, const char* log, void* data) {
//SDParams* params = (SDParams*)data;
const char* level_str;
if (!log /*|| (!params->verbose && level <= SD_LOG_DEBUG)*/) {
return;
}
switch (level) {
case SD_LOG_DEBUG:
level_str = "DEBUG";
break;
case SD_LOG_INFO:
level_str = "INFO";
break;
case SD_LOG_WARN:
level_str = "WARN";
break;
case SD_LOG_ERROR:
level_str = "ERROR";
break;
default: /* Potential future-proofing */
level_str = "?????";
break;
}
fprintf(stderr, "[%-5s] ", level_str);
fputs(log, stderr);
fflush(stderr);
}
int load_model(char *model, char *model_path, char* options[], int threads, int diff) {
fprintf (stderr, "Loading model!\n"); fprintf (stderr, "Loading model!\n");
sd_set_log_callback(sd_log_cb, NULL);
char *stableDiffusionModel = ""; char *stableDiffusionModel = "";
if (diff == 1 ) { if (diff == 1 ) {
stableDiffusionModel = model; stableDiffusionModel = model;
@@ -104,10 +69,6 @@ int load_model(char *model, char *model_path, char* options[], int threads, int
char *vae_path = ""; char *vae_path = "";
char *scheduler = ""; char *scheduler = "";
char *sampler = ""; char *sampler = "";
char *lora_dir = model_path;
bool lora_dir_allocated = false;
fprintf(stderr, "parsing options\n");
// If options is not NULL, parse options // If options is not NULL, parse options
for (int i = 0; options[i] != NULL; i++) { for (int i = 0; options[i] != NULL; i++) {
@@ -135,29 +96,12 @@ int load_model(char *model, char *model_path, char* options[], int threads, int
if (!strcmp(optname, "sampler")) { if (!strcmp(optname, "sampler")) {
sampler = optval; sampler = optval;
} }
if (!strcmp(optname, "lora_dir")) {
// Path join with model dir
if (model_path && strlen(model_path) > 0) {
std::filesystem::path model_path_str(model_path);
std::filesystem::path lora_path(optval);
std::filesystem::path full_lora_path = model_path_str / lora_path;
lora_dir = strdup(full_lora_path.string().c_str());
lora_dir_allocated = true;
fprintf(stderr, "Lora dir resolved to: %s\n", lora_dir);
} else {
lora_dir = optval;
fprintf(stderr, "No model path provided, using lora dir as-is: %s\n", lora_dir);
}
}
} }
fprintf(stderr, "parsed options\n");
int sample_method_found = -1; int sample_method_found = -1;
for (int m = 0; m < SAMPLE_METHOD_COUNT; m++) { for (int m = 0; m < N_SAMPLE_METHODS; m++) {
if (!strcmp(sampler, sample_method_str[m])) { if (!strcmp(sampler, sample_method_str[m])) {
sample_method_found = m; sample_method_found = m;
fprintf(stderr, "Found sampler: %s\n", sampler);
} }
} }
if (sample_method_found == -1) { if (sample_method_found == -1) {
@@ -167,7 +111,7 @@ int load_model(char *model, char *model_path, char* options[], int threads, int
sample_method = (sample_method_t)sample_method_found; sample_method = (sample_method_t)sample_method_found;
int schedule_found = -1; int schedule_found = -1;
for (int d = 0; d < SCHEDULE_COUNT; d++) { for (int d = 0; d < N_SCHEDULES; d++) {
if (!strcmp(scheduler, schedule_str[d])) { if (!strcmp(scheduler, schedule_str[d])) {
schedule_found = d; schedule_found = d;
fprintf (stderr, "Found scheduler: %s\n", scheduler); fprintf (stderr, "Found scheduler: %s\n", scheduler);
@@ -181,50 +125,43 @@ int load_model(char *model, char *model_path, char* options[], int threads, int
} }
schedule_t schedule = (schedule_t)schedule_found; schedule_t schedule = (schedule_t)schedule_found;
fprintf (stderr, "Creating context\n"); fprintf (stderr, "Creating context\n");
sd_ctx_params_t ctx_params; sd_ctx_t* sd_ctx = new_sd_ctx(model,
sd_ctx_params_init(&ctx_params); clip_l_path,
ctx_params.model_path = model; clip_g_path,
ctx_params.clip_l_path = clip_l_path; t5xxl_path,
ctx_params.clip_g_path = clip_g_path; stableDiffusionModel,
ctx_params.t5xxl_path = t5xxl_path; vae_path,
ctx_params.diffusion_model_path = stableDiffusionModel; "",
ctx_params.vae_path = vae_path; "",
ctx_params.taesd_path = ""; "",
ctx_params.control_net_path = ""; "",
ctx_params.lora_model_dir = lora_dir; "",
ctx_params.embedding_dir = ""; false,
ctx_params.stacked_id_embed_dir = ""; false,
ctx_params.vae_decode_only = false; false,
ctx_params.vae_tiling = false; threads,
ctx_params.free_params_immediately = false; SD_TYPE_COUNT,
ctx_params.n_threads = threads; STD_DEFAULT_RNG,
ctx_params.rng_type = STD_DEFAULT_RNG; schedule,
ctx_params.schedule = schedule; false,
sd_ctx_t* sd_ctx = new_sd_ctx(&ctx_params); false,
false,
false);
if (sd_ctx == NULL) { if (sd_ctx == NULL) {
fprintf (stderr, "failed loading model (generic error)\n"); fprintf (stderr, "failed loading model (generic error)\n");
// Clean up allocated memory
if (lora_dir_allocated && lora_dir) {
free(lora_dir);
}
return 1; return 1;
} }
fprintf (stderr, "Created context: OK\n"); fprintf (stderr, "Created context: OK\n");
sd_c = sd_ctx; sd_c = sd_ctx;
// Clean up allocated memory
if (lora_dir_allocated && lora_dir) {
free(lora_dir);
}
return 0; return 0;
} }
int gen_image(char *text, char *negativeText, int width, int height, int steps, int seed , char *dst, float cfg_scale, char *src_image, float strength, char *mask_image, char **ref_images, int ref_images_count) { int gen_image(char *text, char *negativeText, int width, int height, int steps, int seed , char *dst, float cfg_scale) {
sd_image_t* results; sd_image_t* results;
@@ -232,202 +169,37 @@ int gen_image(char *text, char *negativeText, int width, int height, int steps,
fprintf (stderr, "Generating image\n"); fprintf (stderr, "Generating image\n");
sd_img_gen_params_t p; results = txt2img(sd_c,
sd_img_gen_params_init(&p); text,
negativeText,
p.prompt = text; -1, //clip_skip
p.negative_prompt = negativeText; cfg_scale, // sfg_scale
p.guidance.txt_cfg = cfg_scale; 3.5f,
p.guidance.slg.layers = skip_layers.data(); 0, // eta
p.guidance.slg.layer_count = skip_layers.size(); width,
p.width = width; height,
p.height = height; sample_method,
p.sample_method = sample_method; steps,
p.sample_steps = steps; seed,
p.seed = seed; 1,
p.input_id_images_path = ""; NULL,
0.9f,
// Handle input image for img2img 20.f,
bool has_input_image = (src_image != NULL && strlen(src_image) > 0); false,
bool has_mask_image = (mask_image != NULL && strlen(mask_image) > 0); "",
skip_layers.data(),
uint8_t* input_image_buffer = NULL; skip_layers.size(),
uint8_t* mask_image_buffer = NULL; 0,
std::vector<uint8_t> default_mask_image_vec; 0.01,
0.2);
if (has_input_image) {
fprintf(stderr, "Loading input image: %s\n", src_image);
int c = 0;
int img_width = 0;
int img_height = 0;
input_image_buffer = stbi_load(src_image, &img_width, &img_height, &c, 3);
if (input_image_buffer == NULL) {
fprintf(stderr, "Failed to load input image from '%s'\n", src_image);
return 1;
}
if (c < 3) {
fprintf(stderr, "Input image must have at least 3 channels, got %d\n", c);
free(input_image_buffer);
return 1;
}
// Resize input image if dimensions don't match
if (img_width != width || img_height != height) {
fprintf(stderr, "Resizing input image from %dx%d to %dx%d\n", img_width, img_height, width, height);
uint8_t* resized_image_buffer = (uint8_t*)malloc(height * width * 3);
if (resized_image_buffer == NULL) {
fprintf(stderr, "Failed to allocate memory for resized image\n");
free(input_image_buffer);
return 1;
}
stbir_resize(input_image_buffer, img_width, img_height, 0,
resized_image_buffer, width, height, 0, STBIR_TYPE_UINT8,
3, STBIR_ALPHA_CHANNEL_NONE, 0,
STBIR_EDGE_CLAMP, STBIR_EDGE_CLAMP,
STBIR_FILTER_BOX, STBIR_FILTER_BOX,
STBIR_COLORSPACE_SRGB, nullptr);
free(input_image_buffer);
input_image_buffer = resized_image_buffer;
}
p.init_image = {(uint32_t)width, (uint32_t)height, 3, input_image_buffer};
p.strength = strength;
fprintf(stderr, "Using img2img with strength: %.2f\n", strength);
} else {
// No input image, use empty image for text-to-image
p.init_image = {(uint32_t)width, (uint32_t)height, 3, NULL};
p.strength = 0.0f;
}
// Handle mask image for inpainting
if (has_mask_image) {
fprintf(stderr, "Loading mask image: %s\n", mask_image);
int c = 0;
int mask_width = 0;
int mask_height = 0;
mask_image_buffer = stbi_load(mask_image, &mask_width, &mask_height, &c, 1);
if (mask_image_buffer == NULL) {
fprintf(stderr, "Failed to load mask image from '%s'\n", mask_image);
if (input_image_buffer) free(input_image_buffer);
return 1;
}
// Resize mask if dimensions don't match
if (mask_width != width || mask_height != height) {
fprintf(stderr, "Resizing mask image from %dx%d to %dx%d\n", mask_width, mask_height, width, height);
uint8_t* resized_mask_buffer = (uint8_t*)malloc(height * width);
if (resized_mask_buffer == NULL) {
fprintf(stderr, "Failed to allocate memory for resized mask\n");
free(mask_image_buffer);
if (input_image_buffer) free(input_image_buffer);
return 1;
}
stbir_resize(mask_image_buffer, mask_width, mask_height, 0,
resized_mask_buffer, width, height, 0, STBIR_TYPE_UINT8,
1, STBIR_ALPHA_CHANNEL_NONE, 0,
STBIR_EDGE_CLAMP, STBIR_EDGE_CLAMP,
STBIR_FILTER_BOX, STBIR_FILTER_BOX,
STBIR_COLORSPACE_SRGB, nullptr);
free(mask_image_buffer);
mask_image_buffer = resized_mask_buffer;
}
p.mask_image = {(uint32_t)width, (uint32_t)height, 1, mask_image_buffer};
fprintf(stderr, "Using inpainting with mask\n");
} else {
// No mask image, create default full mask
default_mask_image_vec.resize(width * height, 255);
p.mask_image = {(uint32_t)width, (uint32_t)height, 1, default_mask_image_vec.data()};
}
// Handle reference images
std::vector<sd_image_t> ref_images_vec;
std::vector<uint8_t*> ref_image_buffers;
if (ref_images_count > 0 && ref_images != NULL) {
fprintf(stderr, "Loading %d reference images\n", ref_images_count);
for (int i = 0; i < ref_images_count; i++) {
if (ref_images[i] == NULL || strlen(ref_images[i]) == 0) {
continue;
}
fprintf(stderr, "Loading reference image %d: %s\n", i + 1, ref_images[i]);
int c = 0;
int ref_width = 0;
int ref_height = 0;
uint8_t* ref_image_buffer = stbi_load(ref_images[i], &ref_width, &ref_height, &c, 3);
if (ref_image_buffer == NULL) {
fprintf(stderr, "Failed to load reference image from '%s'\n", ref_images[i]);
continue;
}
if (c < 3) {
fprintf(stderr, "Reference image must have at least 3 channels, got %d\n", c);
free(ref_image_buffer);
continue;
}
// Resize reference image if dimensions don't match
if (ref_width != width || ref_height != height) {
fprintf(stderr, "Resizing reference image from %dx%d to %dx%d\n", ref_width, ref_height, width, height);
uint8_t* resized_ref_buffer = (uint8_t*)malloc(height * width * 3);
if (resized_ref_buffer == NULL) {
fprintf(stderr, "Failed to allocate memory for resized reference image\n");
free(ref_image_buffer);
continue;
}
stbir_resize(ref_image_buffer, ref_width, ref_height, 0,
resized_ref_buffer, width, height, 0, STBIR_TYPE_UINT8,
3, STBIR_ALPHA_CHANNEL_NONE, 0,
STBIR_EDGE_CLAMP, STBIR_EDGE_CLAMP,
STBIR_FILTER_BOX, STBIR_FILTER_BOX,
STBIR_COLORSPACE_SRGB, nullptr);
free(ref_image_buffer);
ref_image_buffer = resized_ref_buffer;
}
ref_image_buffers.push_back(ref_image_buffer);
ref_images_vec.push_back({(uint32_t)width, (uint32_t)height, 3, ref_image_buffer});
}
if (!ref_images_vec.empty()) {
p.ref_images = ref_images_vec.data();
p.ref_images_count = ref_images_vec.size();
fprintf(stderr, "Using %zu reference images\n", ref_images_vec.size());
}
}
results = generate_image(sd_c, &p);
if (results == NULL) { if (results == NULL) {
fprintf (stderr, "NO results\n"); fprintf (stderr, "NO results\n");
if (input_image_buffer) free(input_image_buffer);
if (mask_image_buffer) free(mask_image_buffer);
for (auto buffer : ref_image_buffers) {
if (buffer) free(buffer);
}
return 1; return 1;
} }
if (results[0].data == NULL) { if (results[0].data == NULL) {
fprintf (stderr, "Results with no data\n"); fprintf (stderr, "Results with no data\n");
if (input_image_buffer) free(input_image_buffer);
if (mask_image_buffer) free(mask_image_buffer);
for (auto buffer : ref_image_buffers) {
if (buffer) free(buffer);
}
return 1; return 1;
} }
@@ -443,15 +215,11 @@ int gen_image(char *text, char *negativeText, int width, int height, int steps,
results[0].data, 0, NULL); results[0].data, 0, NULL);
fprintf (stderr, "Saved resulting image to '%s'\n", dst); fprintf (stderr, "Saved resulting image to '%s'\n", dst);
// Clean up // TODO: free results. Why does it crash?
free(results[0].data); free(results[0].data);
results[0].data = NULL; results[0].data = NULL;
free(results); free(results);
if (input_image_buffer) free(input_image_buffer);
if (mask_image_buffer) free(mask_image_buffer);
for (auto buffer : ref_image_buffers) {
if (buffer) free(buffer);
}
fprintf (stderr, "gen_image is done", dst); fprintf (stderr, "gen_image is done", dst);
return 0; return 0;

View File

@@ -29,21 +29,16 @@ func (sd *SDGGML) Load(opts *pb.ModelOptions) error {
sd.threads = int(opts.Threads) sd.threads = int(opts.Threads)
modelPath := opts.ModelPath
modelFile := C.CString(opts.ModelFile) modelFile := C.CString(opts.ModelFile)
defer C.free(unsafe.Pointer(modelFile)) defer C.free(unsafe.Pointer(modelFile))
modelPathC := C.CString(modelPath)
defer C.free(unsafe.Pointer(modelPathC))
var options **C.char var options **C.char
// prepare the options array to pass to C // prepare the options array to pass to C
size := C.size_t(unsafe.Sizeof((*C.char)(nil))) size := C.size_t(unsafe.Sizeof((*C.char)(nil)))
length := C.size_t(len(opts.Options)) length := C.size_t(len(opts.Options))
options = (**C.char)(C.malloc((length + 1) * size)) options = (**C.char)(C.malloc(length * size))
view := (*[1 << 30]*C.char)(unsafe.Pointer(options))[0 : len(opts.Options)+1 : len(opts.Options)+1] view := (*[1 << 30]*C.char)(unsafe.Pointer(options))[0:len(opts.Options):len(opts.Options)]
var diffusionModel int var diffusionModel int
@@ -71,11 +66,10 @@ func (sd *SDGGML) Load(opts *pb.ModelOptions) error {
for i, x := range oo { for i, x := range oo {
view[i] = C.CString(x) view[i] = C.CString(x)
} }
view[len(oo)] = nil
sd.cfgScale = opts.CFGScale sd.cfgScale = opts.CFGScale
ret := C.load_model(modelFile, modelPathC, options, C.int(opts.Threads), C.int(diffusionModel)) ret := C.load_model(modelFile, options, C.int(opts.Threads), C.int(diffusionModel))
if ret != 0 { if ret != 0 {
return fmt.Errorf("could not load model") return fmt.Errorf("could not load model")
} }
@@ -93,56 +87,7 @@ func (sd *SDGGML) GenerateImage(opts *pb.GenerateImageRequest) error {
negative := C.CString(opts.NegativePrompt) negative := C.CString(opts.NegativePrompt)
defer C.free(unsafe.Pointer(negative)) defer C.free(unsafe.Pointer(negative))
// Handle source image path ret := C.gen_image(t, negative, C.int(opts.Width), C.int(opts.Height), C.int(opts.Step), C.int(opts.Seed), dst, C.float(sd.cfgScale))
var srcImage *C.char
if opts.Src != "" {
srcImage = C.CString(opts.Src)
defer C.free(unsafe.Pointer(srcImage))
}
// Handle mask image path
var maskImage *C.char
if opts.EnableParameters != "" {
// Parse EnableParameters for mask path if provided
// This is a simple approach - in a real implementation you might want to parse JSON
if strings.Contains(opts.EnableParameters, "mask:") {
parts := strings.Split(opts.EnableParameters, "mask:")
if len(parts) > 1 {
maskPath := strings.TrimSpace(parts[1])
if maskPath != "" {
maskImage = C.CString(maskPath)
defer C.free(unsafe.Pointer(maskImage))
}
}
}
}
// Handle reference images
var refImages **C.char
var refImagesCount C.int
if len(opts.RefImages) > 0 {
refImagesCount = C.int(len(opts.RefImages))
// Allocate array of C strings
size := C.size_t(unsafe.Sizeof((*C.char)(nil)))
refImages = (**C.char)(C.malloc((C.size_t(len(opts.RefImages)) + 1) * size))
view := (*[1 << 30]*C.char)(unsafe.Pointer(refImages))[0 : len(opts.RefImages)+1 : len(opts.RefImages)+1]
for i, refImagePath := range opts.RefImages {
view[i] = C.CString(refImagePath)
defer C.free(unsafe.Pointer(view[i]))
}
view[len(opts.RefImages)] = nil
}
// Default strength for img2img (0.75 is a good default)
strength := C.float(0.75)
if opts.Src != "" {
// If we have a source image, use img2img mode
// You could also parse strength from EnableParameters if needed
strength = C.float(0.75)
}
ret := C.gen_image(t, negative, C.int(opts.Width), C.int(opts.Height), C.int(opts.Step), C.int(opts.Seed), dst, C.float(sd.cfgScale), srcImage, strength, maskImage, refImages, refImagesCount)
if ret != 0 { if ret != 0 {
return fmt.Errorf("inference failed") return fmt.Errorf("inference failed")
} }

View File

@@ -1,8 +1,8 @@
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
int load_model(char *model, char *model_path, char* options[], int threads, int diffusionModel); int load_model(char *model, char* options[], int threads, int diffusionModel);
int gen_image(char *text, char *negativeText, int width, int height, int steps, int seed, char *dst, float cfg_scale, char *src_image, float strength, char *mask_image, char **ref_images, int ref_images_count); int gen_image(char *text, char *negativeText, int width, int height, int steps, int seed, char *dst, float cfg_scale);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

View File

@@ -6,7 +6,7 @@ CMAKE_ARGS?=
# whisper.cpp version # whisper.cpp version
WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
WHISPER_CPP_VERSION?=f7502dca872866a310fe69d30b163fa87d256319 WHISPER_CPP_VERSION?=1f5cf0b2888402d57bb17b2029b2caa97e5f3baf
export WHISPER_CMAKE_ARGS?=-DBUILD_SHARED_LIBS=OFF export WHISPER_CMAKE_ARGS?=-DBUILD_SHARED_LIBS=OFF
export WHISPER_DIR=$(abspath ./sources/whisper.cpp) export WHISPER_DIR=$(abspath ./sources/whisper.cpp)

View File

@@ -73,28 +73,6 @@
nvidia-l4t: "nvidia-l4t-arm64-stablediffusion-ggml" nvidia-l4t: "nvidia-l4t-arm64-stablediffusion-ggml"
# metal: "metal-stablediffusion-ggml" # metal: "metal-stablediffusion-ggml"
# darwin-x86: "darwin-x86-stablediffusion-ggml" # darwin-x86: "darwin-x86-stablediffusion-ggml"
- &rfdetr
name: "rfdetr"
alias: "rfdetr"
license: apache-2.0
icon: https://avatars.githubusercontent.com/u/53104118?s=200&v=4
description: |
RF-DETR is a real-time, transformer-based object detection model architecture developed by Roboflow and released under the Apache 2.0 license.
RF-DETR is the first real-time model to exceed 60 AP on the Microsoft COCO benchmark alongside competitive performance at base sizes. It also achieves state-of-the-art performance on RF100-VL, an object detection benchmark that measures model domain adaptability to real world problems. RF-DETR is fastest and most accurate for its size when compared current real-time objection models.
RF-DETR is small enough to run on the edge using Inference, making it an ideal model for deployments that need both strong accuracy and real-time performance.
urls:
- https://github.com/roboflow/rf-detr
tags:
- object-detection
- rfdetr
- gpu
- cpu
capabilities:
nvidia: "cuda12-rfdetr"
intel: "intel-rfdetr"
#amd: "rocm-rfdetr"
nvidia-l4t: "nvidia-l4t-arm64-rfdetr"
default: "cpu-rfdetr"
- &vllm - &vllm
name: "vllm" name: "vllm"
license: apache-2.0 license: apache-2.0
@@ -126,13 +104,13 @@
capabilities: capabilities:
nvidia: "cuda12-vllm" nvidia: "cuda12-vllm"
amd: "rocm-vllm" amd: "rocm-vllm"
intel: "intel-vllm" intel: "intel-sycl-f16-vllm"
- &rerankers - &rerankers
name: "rerankers" name: "rerankers"
alias: "rerankers" alias: "rerankers"
capabilities: capabilities:
nvidia: "cuda12-rerankers" nvidia: "cuda12-rerankers"
intel: "intel-rerankers" intel: "intel-sycl-f16-rerankers"
amd: "rocm-rerankers" amd: "rocm-rerankers"
- &transformers - &transformers
name: "transformers" name: "transformers"
@@ -149,7 +127,7 @@
- multimodal - multimodal
capabilities: capabilities:
nvidia: "cuda12-transformers" nvidia: "cuda12-transformers"
intel: "intel-transformers" intel: "intel-sycl-f16-transformers"
amd: "rocm-transformers" amd: "rocm-transformers"
- &diffusers - &diffusers
name: "diffusers" name: "diffusers"
@@ -166,7 +144,7 @@
alias: "diffusers" alias: "diffusers"
capabilities: capabilities:
nvidia: "cuda12-diffusers" nvidia: "cuda12-diffusers"
intel: "intel-diffusers" intel: "intel-sycl-f32-diffusers"
amd: "rocm-diffusers" amd: "rocm-diffusers"
- &exllama2 - &exllama2
name: "exllama2" name: "exllama2"
@@ -182,7 +160,8 @@
alias: "exllama2" alias: "exllama2"
capabilities: capabilities:
nvidia: "cuda12-exllama2" nvidia: "cuda12-exllama2"
intel: "intel-exllama2" intel: "intel-sycl-f32-exllama2"
amd: "rocm-exllama2"
- &faster-whisper - &faster-whisper
icon: https://avatars.githubusercontent.com/u/1520500?s=200&v=4 icon: https://avatars.githubusercontent.com/u/1520500?s=200&v=4
description: | description: |
@@ -197,7 +176,7 @@
name: "faster-whisper" name: "faster-whisper"
capabilities: capabilities:
nvidia: "cuda12-faster-whisper" nvidia: "cuda12-faster-whisper"
intel: "intel-faster-whisper" intel: "intel-sycl-f32-faster-whisper"
amd: "rocm-faster-whisper" amd: "rocm-faster-whisper"
- &kokoro - &kokoro
icon: https://avatars.githubusercontent.com/u/166769057?v=4 icon: https://avatars.githubusercontent.com/u/166769057?v=4
@@ -215,7 +194,7 @@
name: "kokoro" name: "kokoro"
capabilities: capabilities:
nvidia: "cuda12-kokoro" nvidia: "cuda12-kokoro"
intel: "intel-kokoro" intel: "intel-sycl-f32-kokoro"
amd: "rocm-kokoro" amd: "rocm-kokoro"
- &coqui - &coqui
urls: urls:
@@ -236,7 +215,7 @@
alias: "coqui" alias: "coqui"
capabilities: capabilities:
nvidia: "cuda12-coqui" nvidia: "cuda12-coqui"
intel: "intel-coqui" intel: "intel-sycl-f32-coqui"
amd: "rocm-coqui" amd: "rocm-coqui"
icon: https://avatars.githubusercontent.com/u/1338804?s=200&v=4 icon: https://avatars.githubusercontent.com/u/1338804?s=200&v=4
- &bark - &bark
@@ -252,7 +231,7 @@
alias: "bark" alias: "bark"
capabilities: capabilities:
cuda: "cuda12-bark" cuda: "cuda12-bark"
intel: "intel-bark" intel: "intel-sycl-f32-bark"
rocm: "rocm-bark" rocm: "rocm-bark"
icon: https://avatars.githubusercontent.com/u/99442120?s=200&v=4 icon: https://avatars.githubusercontent.com/u/99442120?s=200&v=4
- &barkcpp - &barkcpp
@@ -279,8 +258,6 @@
icon: https://github.com/PABannier/bark.cpp/raw/main/assets/banner.png icon: https://github.com/PABannier/bark.cpp/raw/main/assets/banner.png
name: "bark-cpp" name: "bark-cpp"
uri: "quay.io/go-skynet/local-ai-backends:latest-bark-cpp" uri: "quay.io/go-skynet/local-ai-backends:latest-bark-cpp"
mirrors:
- localai/localai-backends:latest-bark-cpp
alias: "bark-cpp" alias: "bark-cpp"
- &chatterbox - &chatterbox
urls: urls:
@@ -303,8 +280,6 @@
urls: urls:
- https://github.com/rhasspy/piper - https://github.com/rhasspy/piper
- https://github.com/mudler/go-piper - https://github.com/mudler/go-piper
mirrors:
- localai/localai-backends:latest-piper
license: MIT license: MIT
description: | description: |
A fast, local neural text to speech system A fast, local neural text to speech system
@@ -317,8 +292,6 @@
icon: https://user-images.githubusercontent.com/12515440/89997349-b3523080-dc94-11ea-9906-ca2e8bc50535.png icon: https://user-images.githubusercontent.com/12515440/89997349-b3523080-dc94-11ea-9906-ca2e8bc50535.png
urls: urls:
- https://github.com/snakers4/silero-vad - https://github.com/snakers4/silero-vad
mirrors:
- localai/localai-backends:latest-cpu-silero-vad
description: | description: |
Silero VAD: pre-trained enterprise-grade Voice Activity Detector. Silero VAD: pre-trained enterprise-grade Voice Activity Detector.
Silero VAD is a voice activity detection model that can be used to detect whether a given audio contains speech or not. Silero VAD is a voice activity detection model that can be used to detect whether a given audio contains speech or not.
@@ -330,8 +303,6 @@
- &local-store - &local-store
name: "local-store" name: "local-store"
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-local-store" uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-local-store"
mirrors:
- localai/localai-backends:latest-cpu-local-store
urls: urls:
- https://github.com/mudler/LocalAI - https://github.com/mudler/LocalAI
description: | description: |
@@ -345,8 +316,6 @@
- &huggingface - &huggingface
name: "huggingface" name: "huggingface"
uri: "quay.io/go-skynet/local-ai-backends:latest-huggingface" uri: "quay.io/go-skynet/local-ai-backends:latest-huggingface"
mirrors:
- localai/localai-backends:latest-huggingface
icon: https://huggingface.co/front/assets/huggingface_logo-noborder.svg icon: https://huggingface.co/front/assets/huggingface_logo-noborder.svg
urls: urls:
- https://huggingface.co/docs/hub/en/api - https://huggingface.co/docs/hub/en/api
@@ -359,721 +328,469 @@
- !!merge <<: *huggingface - !!merge <<: *huggingface
name: "huggingface-development" name: "huggingface-development"
uri: "quay.io/go-skynet/local-ai-backends:master-huggingface" uri: "quay.io/go-skynet/local-ai-backends:master-huggingface"
mirrors:
- localai/localai-backends:master-huggingface
- !!merge <<: *local-store - !!merge <<: *local-store
name: "local-store-development" name: "local-store-development"
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-local-store" uri: "quay.io/go-skynet/local-ai-backends:master-cpu-local-store"
mirrors:
- localai/localai-backends:master-cpu-local-store
- !!merge <<: *silero-vad - !!merge <<: *silero-vad
name: "silero-vad-development" name: "silero-vad-development"
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-silero-vad" uri: "quay.io/go-skynet/local-ai-backends:master-cpu-silero-vad"
mirrors:
- localai/localai-backends:master-cpu-silero-vad
- !!merge <<: *piper - !!merge <<: *piper
name: "piper-development" name: "piper-development"
uri: "quay.io/go-skynet/local-ai-backends:master-piper" uri: "quay.io/go-skynet/local-ai-backends:master-piper"
mirrors:
- localai/localai-backends:master-piper
## llama-cpp ## llama-cpp
- !!merge <<: *llamacpp - !!merge <<: *llamacpp
name: "darwin-x86-llama-cpp" name: "darwin-x86-llama-cpp"
uri: "quay.io/go-skynet/local-ai-backends:latest-darwin-x86-llama-cpp" uri: "quay.io/go-skynet/local-ai-backends:latest-darwin-x86-llama-cpp"
mirrors:
- localai/localai-backends:latest-darwin-x86-llama-cpp
- !!merge <<: *llamacpp - !!merge <<: *llamacpp
name: "darwin-x86-llama-cpp-development" name: "darwin-x86-llama-cpp-development"
uri: "quay.io/go-skynet/local-ai-backends:master-darwin-x86-llama-cpp" uri: "quay.io/go-skynet/local-ai-backends:master-darwin-x86-llama-cpp"
mirrors:
- localai/localai-backends:master-darwin-x86-llama-cpp
- !!merge <<: *llamacpp - !!merge <<: *llamacpp
name: "nvidia-l4t-arm64-llama-cpp" name: "nvidia-l4t-arm64-llama-cpp"
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-arm64-llama-cpp" uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-arm64-llama-cpp"
mirrors:
- localai/localai-backends:latest-nvidia-l4t-arm64-llama-cpp
- !!merge <<: *llamacpp - !!merge <<: *llamacpp
name: "nvidia-l4t-arm64-llama-cpp-development" name: "nvidia-l4t-arm64-llama-cpp-development"
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-arm64-llama-cpp" uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-arm64-llama-cpp"
mirrors:
- localai/localai-backends:master-nvidia-l4t-arm64-llama-cpp
- !!merge <<: *llamacpp - !!merge <<: *llamacpp
name: "cpu-llama-cpp" name: "cpu-llama-cpp"
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-llama-cpp" uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-llama-cpp"
mirrors:
- localai/localai-backends:latest-cpu-llama-cpp
- !!merge <<: *llamacpp - !!merge <<: *llamacpp
name: "cpu-llama-cpp-development" name: "cpu-llama-cpp-development"
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-llama-cpp" uri: "quay.io/go-skynet/local-ai-backends:master-cpu-llama-cpp"
mirrors:
- localai/localai-backends:master-cpu-llama-cpp
- !!merge <<: *llamacpp - !!merge <<: *llamacpp
name: "cuda11-llama-cpp" name: "cuda11-llama-cpp"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-llama-cpp" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-llama-cpp"
mirrors:
- localai/localai-backends:latest-gpu-nvidia-cuda-11-llama-cpp
- !!merge <<: *llamacpp - !!merge <<: *llamacpp
name: "cuda12-llama-cpp" name: "cuda12-llama-cpp"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-llama-cpp" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-llama-cpp"
mirrors:
- localai/localai-backends:latest-gpu-nvidia-cuda-12-llama-cpp
- !!merge <<: *llamacpp - !!merge <<: *llamacpp
name: "rocm-llama-cpp" name: "rocm-llama-cpp"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-llama-cpp" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-llama-cpp"
mirrors:
- localai/localai-backends:latest-gpu-rocm-hipblas-llama-cpp
- !!merge <<: *llamacpp - !!merge <<: *llamacpp
name: "intel-sycl-f32-llama-cpp" name: "intel-sycl-f32-llama-cpp"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-llama-cpp" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-llama-cpp"
mirrors:
- localai/localai-backends:latest-gpu-intel-sycl-f32-llama-cpp
- !!merge <<: *llamacpp - !!merge <<: *llamacpp
name: "intel-sycl-f16-llama-cpp" name: "intel-sycl-f16-llama-cpp"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-llama-cpp" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-llama-cpp"
mirrors:
- localai/localai-backends:latest-gpu-intel-sycl-f16-llama-cpp
- !!merge <<: *llamacpp - !!merge <<: *llamacpp
name: "vulkan-llama-cpp" name: "vulkan-llama-cpp"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-vulkan-llama-cpp" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-vulkan-llama-cpp"
mirrors:
- localai/localai-backends:latest-gpu-vulkan-llama-cpp
- !!merge <<: *llamacpp - !!merge <<: *llamacpp
name: "vulkan-llama-cpp-development" name: "vulkan-llama-cpp-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-vulkan-llama-cpp" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-vulkan-llama-cpp"
mirrors:
- localai/localai-backends:master-gpu-vulkan-llama-cpp
- !!merge <<: *llamacpp - !!merge <<: *llamacpp
name: "metal-llama-cpp" name: "metal-llama-cpp"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-llama-cpp" uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-llama-cpp"
mirrors:
- localai/localai-backends:latest-metal-darwin-arm64-llama-cpp
- !!merge <<: *llamacpp - !!merge <<: *llamacpp
name: "metal-llama-cpp-development" name: "metal-llama-cpp-development"
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-llama-cpp" uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-llama-cpp"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-llama-cpp
- !!merge <<: *llamacpp - !!merge <<: *llamacpp
name: "cuda11-llama-cpp-development" name: "cuda11-llama-cpp-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-llama-cpp" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-llama-cpp"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-11-llama-cpp
- !!merge <<: *llamacpp - !!merge <<: *llamacpp
name: "cuda12-llama-cpp-development" name: "cuda12-llama-cpp-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-llama-cpp" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-llama-cpp"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-12-llama-cpp
- !!merge <<: *llamacpp - !!merge <<: *llamacpp
name: "rocm-llama-cpp-development" name: "rocm-llama-cpp-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-llama-cpp" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-llama-cpp"
mirrors:
- localai/localai-backends:master-gpu-rocm-hipblas-llama-cpp
- !!merge <<: *llamacpp - !!merge <<: *llamacpp
name: "intel-sycl-f32-llama-cpp-development" name: "intel-sycl-f32-llama-cpp-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-llama-cpp" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-llama-cpp"
mirrors:
- localai/localai-backends:master-gpu-intel-sycl-f32-llama-cpp
- !!merge <<: *llamacpp - !!merge <<: *llamacpp
name: "intel-sycl-f16-llama-cpp-development" name: "intel-sycl-f16-llama-cpp-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-llama-cpp" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-llama-cpp"
mirrors:
- localai/localai-backends:master-gpu-intel-sycl-f16-llama-cpp
## whisper ## whisper
- !!merge <<: *whispercpp - !!merge <<: *whispercpp
name: "nvidia-l4t-arm64-whisper" name: "nvidia-l4t-arm64-whisper"
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-arm64-whisper" uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-arm64-whisper"
mirrors:
- localai/localai-backends:latest-nvidia-l4t-arm64-whisper
- !!merge <<: *whispercpp - !!merge <<: *whispercpp
name: "nvidia-l4t-arm64-whisper-development" name: "nvidia-l4t-arm64-whisper-development"
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-arm64-whisper" uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-arm64-whisper"
mirrors:
- localai/localai-backends:master-nvidia-l4t-arm64-whisper
- !!merge <<: *whispercpp - !!merge <<: *whispercpp
name: "cpu-whisper" name: "cpu-whisper"
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-whisper" uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-whisper"
mirrors:
- localai/localai-backends:latest-cpu-whisper
- !!merge <<: *whispercpp - !!merge <<: *whispercpp
name: "cpu-whisper-development" name: "cpu-whisper-development"
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-whisper" uri: "quay.io/go-skynet/local-ai-backends:master-cpu-whisper"
mirrors:
- localai/localai-backends:master-cpu-whisper
- !!merge <<: *whispercpp - !!merge <<: *whispercpp
name: "cuda11-whisper" name: "cuda11-whisper"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-whisper" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-whisper"
mirrors:
- localai/localai-backends:latest-gpu-nvidia-cuda-11-whisper
- !!merge <<: *whispercpp - !!merge <<: *whispercpp
name: "cuda12-whisper" name: "cuda12-whisper"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-whisper" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-whisper"
mirrors:
- localai/localai-backends:latest-gpu-nvidia-cuda-12-whisper
- !!merge <<: *whispercpp - !!merge <<: *whispercpp
name: "rocm-whisper" name: "rocm-whisper"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-whisper" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-whisper"
mirrors:
- localai/localai-backends:latest-gpu-rocm-hipblas-whisper
- !!merge <<: *whispercpp - !!merge <<: *whispercpp
name: "intel-sycl-f32-whisper" name: "intel-sycl-f32-whisper"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-whisper" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-whisper"
mirrors:
- localai/localai-backends:latest-gpu-intel-sycl-f32-whisper
- !!merge <<: *whispercpp - !!merge <<: *whispercpp
name: "intel-sycl-f16-whisper" name: "intel-sycl-f16-whisper"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-whisper" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-whisper"
mirrors:
- localai/localai-backends:latest-gpu-intel-sycl-f16-whisper
- !!merge <<: *whispercpp - !!merge <<: *whispercpp
name: "vulkan-whisper" name: "vulkan-whisper"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-vulkan-whisper" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-vulkan-whisper"
mirrors:
- localai/localai-backends:latest-gpu-vulkan-whisper
- !!merge <<: *whispercpp - !!merge <<: *whispercpp
name: "vulkan-whisper-development" name: "vulkan-whisper-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-vulkan-whisper" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-vulkan-whisper"
mirrors:
- localai/localai-backends:master-gpu-vulkan-whisper
- !!merge <<: *whispercpp - !!merge <<: *whispercpp
name: "metal-whisper" name: "metal-whisper"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-whisper" uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-whisper"
mirrors:
- localai/localai-backends:latest-metal-darwin-arm64-whisper
- !!merge <<: *whispercpp - !!merge <<: *whispercpp
name: "metal-whisper-development" name: "metal-whisper-development"
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-whisper" uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-whisper"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-whisper
- !!merge <<: *whispercpp - !!merge <<: *whispercpp
name: "cuda11-whisper-development" name: "cuda11-whisper-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-whisper" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-whisper"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-11-whisper
- !!merge <<: *whispercpp - !!merge <<: *whispercpp
name: "cuda12-whisper-development" name: "cuda12-whisper-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-whisper" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-whisper"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-12-whisper
- !!merge <<: *whispercpp - !!merge <<: *whispercpp
name: "rocm-whisper-development" name: "rocm-whisper-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-whisper" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-whisper"
mirrors:
- localai/localai-backends:master-gpu-rocm-hipblas-whisper
- !!merge <<: *whispercpp - !!merge <<: *whispercpp
name: "intel-sycl-f32-whisper-development" name: "intel-sycl-f32-whisper-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-whisper" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-whisper"
mirrors:
- localai/localai-backends:master-gpu-intel-sycl-f32-whisper
- !!merge <<: *whispercpp - !!merge <<: *whispercpp
name: "intel-sycl-f16-whisper-development" name: "intel-sycl-f16-whisper-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-whisper" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-whisper"
mirrors:
- localai/localai-backends:master-gpu-intel-sycl-f16-whisper
## stablediffusion-ggml ## stablediffusion-ggml
- !!merge <<: *stablediffusionggml - !!merge <<: *stablediffusionggml
name: "cpu-stablediffusion-ggml" name: "cpu-stablediffusion-ggml"
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-stablediffusion-ggml" uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-stablediffusion-ggml"
mirrors:
- localai/localai-backends:latest-cpu-stablediffusion-ggml
- !!merge <<: *stablediffusionggml - !!merge <<: *stablediffusionggml
name: "cpu-stablediffusion-ggml-development" name: "cpu-stablediffusion-ggml-development"
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-stablediffusion-ggml" uri: "quay.io/go-skynet/local-ai-backends:master-cpu-stablediffusion-ggml"
mirrors:
- localai/localai-backends:master-cpu-stablediffusion-ggml
- !!merge <<: *stablediffusionggml - !!merge <<: *stablediffusionggml
name: "vulkan-stablediffusion-ggml" name: "vulkan-stablediffusion-ggml"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-vulkan-stablediffusion-ggml" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-vulkan-stablediffusion-ggml"
mirrors:
- localai/localai-backends:latest-gpu-vulkan-stablediffusion-ggml
- !!merge <<: *stablediffusionggml - !!merge <<: *stablediffusionggml
name: "vulkan-stablediffusion-ggml-development" name: "vulkan-stablediffusion-ggml-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-vulkan-stablediffusion-ggml" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-vulkan-stablediffusion-ggml"
mirrors:
- localai/localai-backends:master-gpu-vulkan-stablediffusion-ggml
- !!merge <<: *stablediffusionggml - !!merge <<: *stablediffusionggml
name: "cuda12-stablediffusion-ggml" name: "cuda12-stablediffusion-ggml"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-stablediffusion-ggml" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-stablediffusion-ggml"
mirrors:
- localai/localai-backends:latest-gpu-nvidia-cuda-12-stablediffusion-ggml
- !!merge <<: *stablediffusionggml - !!merge <<: *stablediffusionggml
name: "intel-sycl-f32-stablediffusion-ggml" name: "intel-sycl-f32-stablediffusion-ggml"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-stablediffusion-ggml" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-stablediffusion-ggml"
- !!merge <<: *stablediffusionggml - !!merge <<: *stablediffusionggml
name: "intel-sycl-f16-stablediffusion-ggml" name: "intel-sycl-f16-stablediffusion-ggml"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-stablediffusion-ggml" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-stablediffusion-ggml"
mirrors:
- localai/localai-backends:latest-gpu-intel-sycl-f16-stablediffusion-ggml
- !!merge <<: *stablediffusionggml - !!merge <<: *stablediffusionggml
name: "cuda11-stablediffusion-ggml" name: "cuda11-stablediffusion-ggml"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-stablediffusion-ggml" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-stablediffusion-ggml"
mirrors:
- localai/localai-backends:latest-gpu-nvidia-cuda-11-stablediffusion-ggml
- !!merge <<: *stablediffusionggml - !!merge <<: *stablediffusionggml
name: "cuda12-stablediffusion-ggml-development" name: "cuda12-stablediffusion-ggml-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-stablediffusion-ggml" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-stablediffusion-ggml"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-12-stablediffusion-ggml
- !!merge <<: *stablediffusionggml - !!merge <<: *stablediffusionggml
name: "intel-sycl-f32-stablediffusion-ggml-development" name: "intel-sycl-f32-stablediffusion-ggml-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-stablediffusion-ggml" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-stablediffusion-ggml"
mirrors:
- localai/localai-backends:master-gpu-intel-sycl-f32-stablediffusion-ggml
- !!merge <<: *stablediffusionggml - !!merge <<: *stablediffusionggml
name: "intel-sycl-f16-stablediffusion-ggml-development" name: "intel-sycl-f16-stablediffusion-ggml-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-stablediffusion-ggml" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-stablediffusion-ggml"
mirrors:
- localai/localai-backends:master-gpu-intel-sycl-f16-stablediffusion-ggml
- !!merge <<: *stablediffusionggml - !!merge <<: *stablediffusionggml
name: "cuda11-stablediffusion-ggml-development" name: "cuda11-stablediffusion-ggml-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-stablediffusion-ggml" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-stablediffusion-ggml"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-11-stablediffusion-ggml
- !!merge <<: *stablediffusionggml - !!merge <<: *stablediffusionggml
name: "nvidia-l4t-arm64-stablediffusion-ggml-development" name: "nvidia-l4t-arm64-stablediffusion-ggml-development"
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-arm64-stablediffusion-ggml" uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-arm64-stablediffusion-ggml"
mirrors:
- localai/localai-backends:master-nvidia-l4t-arm64-stablediffusion-ggml
- !!merge <<: *stablediffusionggml - !!merge <<: *stablediffusionggml
name: "nvidia-l4t-arm64-stablediffusion-ggml" name: "nvidia-l4t-arm64-stablediffusion-ggml"
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-arm64-stablediffusion-ggml" uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-arm64-stablediffusion-ggml"
mirrors:
- localai/localai-backends:latest-nvidia-l4t-arm64-stablediffusion-ggml
# vllm # vllm
- !!merge <<: *vllm - !!merge <<: *vllm
name: "vllm-development" name: "vllm-development"
capabilities: capabilities:
nvidia: "cuda12-vllm-development" nvidia: "cuda12-vllm-development"
amd: "rocm-vllm-development" amd: "rocm-vllm-development"
intel: "intel-vllm-development" intel: "intel-sycl-f16-vllm-development"
- !!merge <<: *vllm - !!merge <<: *vllm
name: "cuda12-vllm" name: "cuda12-vllm"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-vllm" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-vllm"
mirrors:
- localai/localai-backends:latest-gpu-nvidia-cuda-12-vllm
- !!merge <<: *vllm - !!merge <<: *vllm
name: "rocm-vllm" name: "rocm-vllm"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-vllm" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-vllm"
mirrors:
- localai/localai-backends:latest-gpu-rocm-hipblas-vllm
- !!merge <<: *vllm - !!merge <<: *vllm
name: "intel-vllm" name: "intel-sycl-f32-vllm"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-vllm" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-vllm"
mirrors: - !!merge <<: *vllm
- localai/localai-backends:latest-gpu-intel-vllm name: "intel-sycl-f16-vllm"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-vllm"
- !!merge <<: *vllm - !!merge <<: *vllm
name: "cuda12-vllm-development" name: "cuda12-vllm-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-vllm" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-vllm"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-12-vllm
- !!merge <<: *vllm - !!merge <<: *vllm
name: "rocm-vllm-development" name: "rocm-vllm-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-vllm" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-vllm"
mirrors:
- localai/localai-backends:master-gpu-rocm-hipblas-vllm
- !!merge <<: *vllm - !!merge <<: *vllm
name: "intel-vllm-development" name: "intel-sycl-f32-vllm-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-vllm" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-vllm"
mirrors: - !!merge <<: *vllm
- localai/localai-backends:master-gpu-intel-vllm name: "intel-sycl-f16-vllm-development"
# rfdetr uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-vllm"
- !!merge <<: *rfdetr
name: "rfdetr-development"
capabilities:
nvidia: "cuda12-rfdetr-development"
intel: "intel-rfdetr-development"
#amd: "rocm-rfdetr-development"
nvidia-l4t: "nvidia-l4t-arm64-rfdetr-development"
default: "cpu-rfdetr-development"
- !!merge <<: *rfdetr
name: "cuda12-rfdetr"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-rfdetr"
mirrors:
- localai/localai-backends:latest-gpu-nvidia-cuda-12-rfdetr
- !!merge <<: *rfdetr
name: "intel-rfdetr"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-rfdetr"
mirrors:
- localai/localai-backends:latest-gpu-intel-rfdetr
# - !!merge <<: *rfdetr
# name: "rocm-rfdetr"
# uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-hipblas-rfdetr"
# mirrors:
# - localai/localai-backends:latest-gpu-hipblas-rfdetr
- !!merge <<: *rfdetr
name: "nvidia-l4t-arm64-rfdetr"
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-arm64-rfdetr"
mirrors:
- localai/localai-backends:latest-nvidia-l4t-arm64-rfdetr
- !!merge <<: *rfdetr
name: "cpu-rfdetr"
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-rfdetr"
mirrors:
- localai/localai-backends:latest-cpu-rfdetr
- !!merge <<: *rfdetr
name: "cuda12-rfdetr-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-rfdetr"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-12-rfdetr
- !!merge <<: *rfdetr
name: "intel-rfdetr-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-rfdetr"
mirrors:
- localai/localai-backends:master-gpu-intel-rfdetr
# - !!merge <<: *rfdetr
# name: "rocm-rfdetr-development"
# uri: "quay.io/go-skynet/local-ai-backends:master-gpu-hipblas-rfdetr"
# mirrors:
# - localai/localai-backends:master-gpu-hipblas-rfdetr
- !!merge <<: *rfdetr
name: "cpu-rfdetr-development"
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-rfdetr"
mirrors:
- localai/localai-backends:master-cpu-rfdetr
- !!merge <<: *rfdetr
name: "intel-rfdetr"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-rfdetr"
mirrors:
- localai/localai-backends:latest-gpu-intel-rfdetr
## Rerankers ## Rerankers
- !!merge <<: *rerankers - !!merge <<: *rerankers
name: "rerankers-development" name: "rerankers-development"
capabilities: capabilities:
nvidia: "cuda12-rerankers-development" nvidia: "cuda12-rerankers-development"
intel: "intel-rerankers-development" intel: "intel-sycl-f16-rerankers-development"
amd: "rocm-rerankers-development" amd: "rocm-rerankers-development"
- !!merge <<: *rerankers - !!merge <<: *rerankers
name: "cuda11-rerankers" name: "cuda11-rerankers"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-rerankers" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-rerankers"
mirrors:
- localai/localai-backends:latest-gpu-nvidia-cuda-11-rerankers
- !!merge <<: *rerankers - !!merge <<: *rerankers
name: "cuda12-rerankers" name: "cuda12-rerankers"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-rerankers" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-rerankers"
mirrors:
- localai/localai-backends:latest-gpu-nvidia-cuda-12-rerankers
- !!merge <<: *rerankers - !!merge <<: *rerankers
name: "intel-rerankers" name: "intel-sycl-f32-rerankers"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-rerankers" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-rerankers"
mirrors: - !!merge <<: *rerankers
- localai/localai-backends:latest-gpu-intel-rerankers name: "intel-sycl-f16-rerankers"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-rerankers"
- !!merge <<: *rerankers - !!merge <<: *rerankers
name: "rocm-rerankers" name: "rocm-rerankers"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-rerankers" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-rerankers"
mirrors:
- localai/localai-backends:latest-gpu-rocm-hipblas-rerankers
- !!merge <<: *rerankers - !!merge <<: *rerankers
name: "cuda11-rerankers-development" name: "cuda11-rerankers-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-rerankers" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-rerankers"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-11-rerankers
- !!merge <<: *rerankers - !!merge <<: *rerankers
name: "cuda12-rerankers-development" name: "cuda12-rerankers-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-rerankers" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-rerankers"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-12-rerankers
- !!merge <<: *rerankers - !!merge <<: *rerankers
name: "rocm-rerankers-development" name: "rocm-rerankers-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-rerankers" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-rerankers"
mirrors:
- localai/localai-backends:master-gpu-rocm-hipblas-rerankers
- !!merge <<: *rerankers - !!merge <<: *rerankers
name: "intel-rerankers-development" name: "intel-sycl-f32-rerankers-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-rerankers" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-rerankers"
mirrors: - !!merge <<: *rerankers
- localai/localai-backends:master-gpu-intel-rerankers name: "intel-sycl-f16-rerankers-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-rerankers"
## Transformers ## Transformers
- !!merge <<: *transformers - !!merge <<: *transformers
name: "transformers-development" name: "transformers-development"
capabilities: capabilities:
nvidia: "cuda12-transformers-development" nvidia: "cuda12-transformers-development"
intel: "intel-transformers-development" intel: "intel-sycl-f16-transformers-development"
amd: "rocm-transformers-development" amd: "rocm-transformers-development"
- !!merge <<: *transformers - !!merge <<: *transformers
name: "cuda12-transformers" name: "cuda12-transformers"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-transformers" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-transformers"
mirrors:
- localai/localai-backends:latest-gpu-nvidia-cuda-12-transformers
- !!merge <<: *transformers - !!merge <<: *transformers
name: "rocm-transformers" name: "rocm-transformers"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-transformers" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-transformers"
mirrors:
- localai/localai-backends:latest-gpu-rocm-hipblas-transformers
- !!merge <<: *transformers - !!merge <<: *transformers
name: "intel-transformers" name: "intel-sycl-f32-transformers"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-transformers" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-transformers"
mirrors: - !!merge <<: *transformers
- localai/localai-backends:latest-gpu-intel-transformers name: "intel-sycl-f16-transformers"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-transformers"
- !!merge <<: *transformers - !!merge <<: *transformers
name: "cuda11-transformers-development" name: "cuda11-transformers-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-transformers" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-transformers"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-11-transformers
- !!merge <<: *transformers - !!merge <<: *transformers
name: "cuda11-transformers" name: "cuda11-transformers"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-transformers" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-transformers"
mirrors:
- localai/localai-backends:latest-gpu-nvidia-cuda-11-transformers
- !!merge <<: *transformers - !!merge <<: *transformers
name: "cuda12-transformers-development" name: "cuda12-transformers-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-transformers" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-transformers"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-12-transformers
- !!merge <<: *transformers - !!merge <<: *transformers
name: "rocm-transformers-development" name: "rocm-transformers-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-transformers" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-transformers"
mirrors:
- localai/localai-backends:master-gpu-rocm-hipblas-transformers
- !!merge <<: *transformers - !!merge <<: *transformers
name: "intel-transformers-development" name: "intel-sycl-f32-transformers-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-transformers" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-transformers"
mirrors: - !!merge <<: *transformers
- localai/localai-backends:master-gpu-intel-transformers name: "intel-sycl-f16-transformers-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-transformers"
## Diffusers ## Diffusers
- !!merge <<: *diffusers - !!merge <<: *diffusers
name: "diffusers-development" name: "diffusers-development"
capabilities: capabilities:
nvidia: "cuda12-diffusers-development" nvidia: "cuda12-diffusers-development"
intel: "intel-diffusers-development" intel: "intel-sycl-f32-diffusers-development"
amd: "rocm-diffusers-development" amd: "rocm-diffusers-development"
- !!merge <<: *diffusers - !!merge <<: *diffusers
name: "cuda12-diffusers" name: "cuda12-diffusers"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-diffusers" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-diffusers"
mirrors:
- localai/localai-backends:latest-gpu-nvidia-cuda-12-diffusers
- !!merge <<: *diffusers - !!merge <<: *diffusers
name: "rocm-diffusers" name: "rocm-diffusers"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-diffusers" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-diffusers"
mirrors:
- localai/localai-backends:latest-gpu-rocm-hipblas-diffusers
- !!merge <<: *diffusers - !!merge <<: *diffusers
name: "cuda11-diffusers" name: "cuda11-diffusers"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-diffusers" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-diffusers"
mirrors:
- localai/localai-backends:latest-gpu-nvidia-cuda-11-diffusers
- !!merge <<: *diffusers - !!merge <<: *diffusers
name: "intel-diffusers" name: "intel-sycl-f32-diffusers"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-diffusers" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-diffusers"
mirrors:
- localai/localai-backends:latest-gpu-intel-diffusers
- !!merge <<: *diffusers - !!merge <<: *diffusers
name: "cuda11-diffusers-development" name: "cuda11-diffusers-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-diffusers" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-diffusers"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-11-diffusers
- !!merge <<: *diffusers - !!merge <<: *diffusers
name: "cuda12-diffusers-development" name: "cuda12-diffusers-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-diffusers" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-diffusers"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-12-diffusers
- !!merge <<: *diffusers - !!merge <<: *diffusers
name: "rocm-diffusers-development" name: "rocm-diffusers-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-diffusers" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-diffusers"
mirrors:
- localai/localai-backends:master-gpu-rocm-hipblas-diffusers
- !!merge <<: *diffusers - !!merge <<: *diffusers
name: "intel-diffusers-development" name: "intel-sycl-f32-diffusers-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-diffusers" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-diffusers"
mirrors:
- localai/localai-backends:master-gpu-intel-diffusers
## exllama2 ## exllama2
- !!merge <<: *exllama2 - !!merge <<: *exllama2
name: "exllama2-development" name: "exllama2-development"
capabilities: capabilities:
nvidia: "cuda12-exllama2-development" nvidia: "cuda12-exllama2-development"
intel: "intel-exllama2-development" intel: "intel-sycl-f32-exllama2-development"
amd: "rocm-exllama2-development"
- !!merge <<: *exllama2 - !!merge <<: *exllama2
name: "cuda11-exllama2" name: "cuda11-exllama2"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-exllama2" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-exllama2"
mirrors:
- localai/localai-backends:latest-gpu-nvidia-cuda-11-exllama2
- !!merge <<: *exllama2 - !!merge <<: *exllama2
name: "cuda12-exllama2" name: "cuda12-exllama2"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-exllama2" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-exllama2"
mirrors:
- localai/localai-backends:latest-gpu-nvidia-cuda-12-exllama2
- !!merge <<: *exllama2 - !!merge <<: *exllama2
name: "cuda11-exllama2-development" name: "cuda11-exllama2-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-exllama2" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-exllama2"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-11-exllama2
- !!merge <<: *exllama2 - !!merge <<: *exllama2
name: "cuda12-exllama2-development" name: "cuda12-exllama2-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-exllama2" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-exllama2"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-12-exllama2
## kokoro ## kokoro
- !!merge <<: *kokoro - !!merge <<: *kokoro
name: "kokoro-development" name: "kokoro-development"
capabilities: capabilities:
nvidia: "cuda12-kokoro-development" nvidia: "cuda12-kokoro-development"
intel: "intel-kokoro-development" intel: "intel-sycl-f32-kokoro-development"
amd: "rocm-kokoro-development" amd: "rocm-kokoro-development"
- !!merge <<: *kokoro - !!merge <<: *kokoro
name: "cuda11-kokoro-development" name: "cuda11-kokoro-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-kokoro" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-kokoro"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-11-kokoro
- !!merge <<: *kokoro - !!merge <<: *kokoro
name: "cuda12-kokoro-development" name: "cuda12-kokoro-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-kokoro" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-kokoro"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-12-kokoro
- !!merge <<: *kokoro - !!merge <<: *kokoro
name: "rocm-kokoro-development" name: "rocm-kokoro-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-kokoro" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-kokoro"
mirrors:
- localai/localai-backends:master-gpu-rocm-hipblas-kokoro
- !!merge <<: *kokoro - !!merge <<: *kokoro
name: "intel-kokoro" name: "sycl-f32-kokoro"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-kokoro" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-kokoro"
mirrors:
- localai/localai-backends:latest-gpu-intel-kokoro
- !!merge <<: *kokoro - !!merge <<: *kokoro
name: "intel-kokoro-development" name: "sycl-f16-kokoro"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-kokoro" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-kokoro"
mirrors: - !!merge <<: *kokoro
- localai/localai-backends:master-gpu-intel-kokoro name: "sycl-f16-kokoro-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-kokoro"
- !!merge <<: *kokoro
name: "sycl-f32-kokoro-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-kokoro"
- !!merge <<: *kokoro - !!merge <<: *kokoro
name: "cuda11-kokoro" name: "cuda11-kokoro"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-kokoro" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-kokoro"
mirrors:
- localai/localai-backends:latest-gpu-nvidia-cuda-11-kokoro
- !!merge <<: *kokoro - !!merge <<: *kokoro
name: "cuda12-kokoro" name: "cuda12-kokoro"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-kokoro" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-kokoro"
mirrors:
- localai/localai-backends:latest-gpu-nvidia-cuda-12-kokoro
- !!merge <<: *kokoro - !!merge <<: *kokoro
name: "rocm-kokoro" name: "rocm-kokoro"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-kokoro" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-kokoro"
mirrors:
- localai/localai-backends:latest-gpu-rocm-hipblas-kokoro
## faster-whisper ## faster-whisper
- !!merge <<: *faster-whisper - !!merge <<: *faster-whisper
name: "faster-whisper-development" name: "faster-whisper-development"
capabilities: capabilities:
nvidia: "cuda12-faster-whisper-development" nvidia: "cuda12-faster-whisper-development"
intel: "intel-faster-whisper-development" intel: "intel-sycl-f32-faster-whisper-development"
amd: "rocm-faster-whisper-development" amd: "rocm-faster-whisper-development"
- !!merge <<: *faster-whisper - !!merge <<: *faster-whisper
name: "cuda11-faster-whisper" name: "cuda11-faster-whisper"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-faster-whisper" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-faster-whisper"
mirrors:
- localai/localai-backends:latest-gpu-nvidia-cuda-11-faster-whisper
- !!merge <<: *faster-whisper - !!merge <<: *faster-whisper
name: "cuda12-faster-whisper-development" name: "cuda12-faster-whisper-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-faster-whisper" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-faster-whisper"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-12-faster-whisper
- !!merge <<: *faster-whisper - !!merge <<: *faster-whisper
name: "rocm-faster-whisper-development" name: "rocm-faster-whisper-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-faster-whisper" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-faster-whisper"
mirrors:
- localai/localai-backends:master-gpu-rocm-hipblas-faster-whisper
- !!merge <<: *faster-whisper - !!merge <<: *faster-whisper
name: "intel-faster-whisper" name: "sycl-f32-faster-whisper"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-faster-whisper" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-faster-whisper"
mirrors:
- localai/localai-backends:latest-gpu-intel-faster-whisper
- !!merge <<: *faster-whisper - !!merge <<: *faster-whisper
name: "intel-faster-whisper-development" name: "sycl-f16-faster-whisper"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-faster-whisper" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-faster-whisper"
mirrors: - !!merge <<: *faster-whisper
- localai/localai-backends:master-gpu-intel-faster-whisper name: "sycl-f32-faster-whisper-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-faster-whisper"
- !!merge <<: *faster-whisper
name: "sycl-f16-faster-whisper-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-faster-whisper"
## coqui ## coqui
- !!merge <<: *coqui - !!merge <<: *coqui
name: "coqui-development" name: "coqui-development"
capabilities: capabilities:
nvidia: "cuda12-coqui-development" nvidia: "cuda12-coqui-development"
intel: "intel-coqui-development" intel: "intel-sycl-f32-coqui-development"
amd: "rocm-coqui-development" amd: "rocm-coqui-development"
- !!merge <<: *coqui - !!merge <<: *coqui
name: "cuda11-coqui" name: "cuda11-coqui"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-coqui" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-coqui"
mirrors:
- localai/localai-backends:latest-gpu-nvidia-cuda-11-coqui
- !!merge <<: *coqui - !!merge <<: *coqui
name: "cuda12-coqui" name: "cuda12-coqui"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-coqui" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-coqui"
mirrors:
- localai/localai-backends:latest-gpu-nvidia-cuda-12-coqui
- !!merge <<: *coqui - !!merge <<: *coqui
name: "cuda11-coqui-development" name: "cuda11-coqui-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-coqui" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-coqui"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-11-coqui
- !!merge <<: *coqui - !!merge <<: *coqui
name: "cuda12-coqui-development" name: "cuda12-coqui-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-coqui" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-coqui"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-12-coqui
- !!merge <<: *coqui - !!merge <<: *coqui
name: "rocm-coqui-development" name: "rocm-coqui-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-coqui" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-coqui"
mirrors:
- localai/localai-backends:master-gpu-rocm-hipblas-coqui
- !!merge <<: *coqui - !!merge <<: *coqui
name: "intel-coqui" name: "sycl-f32-coqui"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-coqui" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-coqui"
mirrors:
- localai/localai-backends:latest-gpu-intel-coqui
- !!merge <<: *coqui - !!merge <<: *coqui
name: "intel-coqui-development" name: "sycl-f16-coqui"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-coqui" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-coqui"
mirrors: - !!merge <<: *coqui
- localai/localai-backends:master-gpu-intel-coqui name: "sycl-f32-coqui-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-coqui"
- !!merge <<: *coqui
name: "sycl-f16-coqui-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-coqui"
- !!merge <<: *coqui - !!merge <<: *coqui
name: "rocm-coqui" name: "rocm-coqui"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-coqui" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-coqui"
mirrors:
- localai/localai-backends:latest-gpu-rocm-hipblas-coqui
## bark ## bark
- !!merge <<: *bark - !!merge <<: *bark
name: "bark-development" name: "bark-development"
capabilities: capabilities:
nvidia: "cuda12-bark-development" nvidia: "cuda12-bark-development"
intel: "intel-bark-development" intel: "intel-sycl-f32-bark-development"
amd: "rocm-bark-development" amd: "rocm-bark-development"
- !!merge <<: *bark - !!merge <<: *bark
name: "cuda11-bark-development" name: "cuda11-bark-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-bark" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-bark"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-11-bark
- !!merge <<: *bark - !!merge <<: *bark
name: "cuda11-bark" name: "cuda11-bark"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-bark" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-bark"
mirrors:
- localai/localai-backends:latest-gpu-nvidia-cuda-11-bark
- !!merge <<: *bark - !!merge <<: *bark
name: "rocm-bark-development" name: "rocm-bark-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-bark" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-bark"
mirrors:
- localai/localai-backends:master-gpu-rocm-hipblas-bark
- !!merge <<: *bark - !!merge <<: *bark
name: "intel-bark" name: "sycl-f32-bark"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-bark" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-bark"
mirrors:
- localai/localai-backends:latest-gpu-intel-bark
- !!merge <<: *bark - !!merge <<: *bark
name: "intel-bark-development" name: "sycl-f16-bark"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-bark" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-bark"
mirrors: - !!merge <<: *bark
- localai/localai-backends:master-gpu-intel-bark name: "sycl-f32-bark-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-bark"
- !!merge <<: *bark
name: "sycl-f16-bark-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-bark"
- !!merge <<: *bark - !!merge <<: *bark
name: "cuda12-bark" name: "cuda12-bark"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-bark" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-bark"
mirrors:
- localai/localai-backends:latest-gpu-nvidia-cuda-12-bark
- !!merge <<: *bark - !!merge <<: *bark
name: "rocm-bark" name: "rocm-bark"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-bark" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-bark"
mirrors:
- localai/localai-backends:latest-gpu-rocm-hipblas-bark
- !!merge <<: *bark - !!merge <<: *bark
name: "cuda12-bark-development" name: "cuda12-bark-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-bark" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-bark"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-12-bark
- !!merge <<: *barkcpp - !!merge <<: *barkcpp
name: "bark-cpp-development" name: "bark-cpp-development"
uri: "quay.io/go-skynet/local-ai-backends:master-bark-cpp" uri: "quay.io/go-skynet/local-ai-backends:master-bark-cpp"
@@ -1086,20 +803,12 @@
- !!merge <<: *chatterbox - !!merge <<: *chatterbox
name: "cuda12-chatterbox-development" name: "cuda12-chatterbox-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-chatterbox" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-chatterbox"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-12-chatterbox
- !!merge <<: *chatterbox - !!merge <<: *chatterbox
name: "cuda11-chatterbox" name: "cuda11-chatterbox"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-chatterbox" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-chatterbox"
mirrors:
- localai/localai-backends:latest-gpu-nvidia-cuda-11-chatterbox
- !!merge <<: *chatterbox - !!merge <<: *chatterbox
name: "cuda11-chatterbox-development" name: "cuda11-chatterbox-development"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-chatterbox" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-chatterbox"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-11-chatterbox
- !!merge <<: *chatterbox - !!merge <<: *chatterbox
name: "cuda12-chatterbox" name: "cuda12-chatterbox"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-chatterbox" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-chatterbox"
mirrors:
- localai/localai-backends:latest-gpu-nvidia-cuda-12-chatterbox

View File

@@ -111,7 +111,7 @@ function ensureVenv() {
# - requirements-${BUILD_TYPE}.txt # - requirements-${BUILD_TYPE}.txt
# - requirements-${BUILD_PROFILE}.txt # - requirements-${BUILD_PROFILE}.txt
# #
# BUILD_PROFILE is a pore specific version of BUILD_TYPE, ex: cuda-11 or cuda-12 # BUILD_PROFILE is a pore specific version of BUILD_TYPE, ex: cuda11 or cuda12
# it can also include some options that we do not have BUILD_TYPES for, ex: intel # it can also include some options that we do not have BUILD_TYPES for, ex: intel
# #
# NOTE: for BUILD_PROFILE==intel, this function does NOT automatically use the Intel python package index. # NOTE: for BUILD_PROFILE==intel, this function does NOT automatically use the Intel python package index.

View File

@@ -8,6 +8,4 @@ else
source $backend_dir/../common/libbackend.sh source $backend_dir/../common/libbackend.sh
fi fi
ensureVenv
python3 -m grpc_tools.protoc -I../.. -I./ --python_out=. --grpc_python_out=. backend.proto python3 -m grpc_tools.protoc -I../.. -I./ --python_out=. --grpc_python_out=. backend.proto

View File

@@ -1,20 +0,0 @@
.DEFAULT_GOAL := install
.PHONY: install
install:
bash install.sh
$(MAKE) protogen
.PHONY: protogen
protogen: backend_pb2_grpc.py backend_pb2.py
.PHONY: protogen-clean
protogen-clean:
$(RM) backend_pb2_grpc.py backend_pb2.py
backend_pb2_grpc.py backend_pb2.py:
bash protogen.sh
.PHONY: clean
clean: protogen-clean
rm -rf venv __pycache__

View File

@@ -1,174 +0,0 @@
#!/usr/bin/env python3
"""
gRPC server for RFDETR object detection models.
"""
from concurrent import futures
import argparse
import signal
import sys
import os
import time
import base64
import backend_pb2
import backend_pb2_grpc
import grpc
import requests
import supervision as sv
from inference import get_model
from PIL import Image
from io import BytesIO
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
# Implement the BackendServicer class with the service methods
class BackendServicer(backend_pb2_grpc.BackendServicer):
"""
A gRPC servicer for the RFDETR backend service.
This class implements the gRPC methods for object detection using RFDETR models.
"""
def __init__(self):
self.model = None
self.model_name = None
def Health(self, request, context):
"""
A gRPC method that returns the health status of the backend service.
Args:
request: A HealthMessage object that contains the request parameters.
context: A grpc.ServicerContext object that provides information about the RPC.
Returns:
A Reply object that contains the health status of the backend service.
"""
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
def LoadModel(self, request, context):
"""
A gRPC method that loads a RFDETR model into memory.
Args:
request: A ModelOptions object that contains the model parameters.
context: A grpc.ServicerContext object that provides information about the RPC.
Returns:
A Result object that contains the result of the LoadModel operation.
"""
model_name = request.Model
try:
# Load the RFDETR model
self.model = get_model(model_name)
self.model_name = model_name
print(f'Loaded RFDETR model: {model_name}')
except Exception as err:
return backend_pb2.Result(success=False, message=f"Failed to load model: {err}")
return backend_pb2.Result(message="Model loaded successfully", success=True)
def Detect(self, request, context):
"""
A gRPC method that performs object detection on an image.
Args:
request: A DetectOptions object that contains the image source.
context: A grpc.ServicerContext object that provides information about the RPC.
Returns:
A DetectResponse object that contains the detection results.
"""
if self.model is None:
print(f"Model is None")
return backend_pb2.DetectResponse()
print(f"Model is not None")
try:
print(f"Decoding image")
# Decode the base64 image
print(f"Image data: {request.src}")
image_data = base64.b64decode(request.src)
image = Image.open(BytesIO(image_data))
# Perform inference
predictions = self.model.infer(image, confidence=0.5)[0]
# Convert to proto format
proto_detections = []
for i in range(len(predictions.predictions)):
pred = predictions.predictions[i]
print(f"Prediction: {pred}")
proto_detection = backend_pb2.Detection(
x=float(pred.x),
y=float(pred.y),
width=float(pred.width),
height=float(pred.height),
confidence=float(pred.confidence),
class_name=pred.class_name
)
proto_detections.append(proto_detection)
return backend_pb2.DetectResponse(Detections=proto_detections)
except Exception as err:
print(f"Detection error: {err}")
return backend_pb2.DetectResponse()
def Status(self, request, context):
"""
A gRPC method that returns the status of the backend service.
Args:
request: A HealthMessage object that contains the request parameters.
context: A grpc.ServicerContext object that provides information about the RPC.
Returns:
A StatusResponse object that contains the status information.
"""
state = backend_pb2.StatusResponse.READY if self.model is not None else backend_pb2.StatusResponse.UNINITIALIZED
return backend_pb2.StatusResponse(state=state)
def serve(address):
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),
options=[
('grpc.max_message_length', 50 * 1024 * 1024), # 50MB
('grpc.max_send_message_length', 50 * 1024 * 1024), # 50MB
('grpc.max_receive_message_length', 50 * 1024 * 1024), # 50MB
])
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
server.add_insecure_port(address)
server.start()
print("[RFDETR] Server started. Listening on: " + address, file=sys.stderr)
# Define the signal handler function
def signal_handler(sig, frame):
print("[RFDETR] Received termination signal. Shutting down...")
server.stop(0)
sys.exit(0)
# Set the signal handlers for SIGINT and SIGTERM
signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGTERM, signal_handler)
try:
while True:
time.sleep(_ONE_DAY_IN_SECONDS)
except KeyboardInterrupt:
server.stop(0)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Run the RFDETR gRPC server.")
parser.add_argument(
"--addr", default="localhost:50051", help="The address to bind the server to."
)
args = parser.parse_args()
print(f"[RFDETR] startup: {args}", file=sys.stderr)
serve(args.addr)

View File

@@ -1,19 +0,0 @@
#!/bin/bash
set -e
backend_dir=$(dirname $0)
if [ -d $backend_dir/common ]; then
source $backend_dir/common/libbackend.sh
else
source $backend_dir/../common/libbackend.sh
fi
# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
if [ "x${BUILD_PROFILE}" == "xintel" ]; then
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
fi
installRequirements

View File

@@ -1,13 +0,0 @@
#!/bin/bash
set -e
backend_dir=$(dirname $0)
if [ -d $backend_dir/common ]; then
source $backend_dir/common/libbackend.sh
else
source $backend_dir/../common/libbackend.sh
fi
ensureVenv
python3 -m grpc_tools.protoc -I../.. -I./ --python_out=. --grpc_python_out=. backend.proto

View File

@@ -1,7 +0,0 @@
rfdetr
opencv-python
accelerate
peft
inference
torch==2.7.1
optimum-quanto

View File

@@ -1,8 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
torch==2.7.1+cu118
rfdetr
opencv-python
accelerate
inference
peft
optimum-quanto

View File

@@ -1,7 +0,0 @@
torch==2.7.1
rfdetr
opencv-python
accelerate
inference
peft
optimum-quanto

View File

@@ -1,9 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.3
torch==2.7.1+rocm6.3
torchvision==0.22.1+rocm6.3
rfdetr
opencv-python
accelerate
inference
peft
optimum-quanto

View File

@@ -1,13 +0,0 @@
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
intel-extension-for-pytorch==2.3.110+xpu
torch==2.3.1+cxx11.abi
torchvision==0.18.1+cxx11.abi
oneccl_bind_pt==2.3.100+xpu
optimum[openvino]
setuptools
rfdetr
inference
opencv-python
accelerate
peft
optimum-quanto

View File

@@ -1,3 +0,0 @@
grpcio==1.71.0
protobuf
grpcio-tools

View File

@@ -1,9 +0,0 @@
#!/bin/bash
backend_dir=$(dirname $0)
if [ -d $backend_dir/common ]; then
source $backend_dir/common/libbackend.sh
else
source $backend_dir/../common/libbackend.sh
fi
startBackend $@

View File

@@ -1,11 +0,0 @@
#!/bin/bash
set -e
backend_dir=$(dirname $0)
if [ -d $backend_dir/common ]; then
source $backend_dir/common/libbackend.sh
else
source $backend_dir/../common/libbackend.sh
fi
runUnittests

View File

@@ -2,8 +2,8 @@ package application
import ( import (
"github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/templates"
"github.com/mudler/LocalAI/pkg/model" "github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/LocalAI/pkg/templates"
) )
type Application struct { type Application struct {

View File

@@ -10,8 +10,8 @@ import (
"github.com/mudler/LocalAI/core/services" "github.com/mudler/LocalAI/core/services"
"github.com/mudler/LocalAI/internal" "github.com/mudler/LocalAI/internal"
coreStartup "github.com/mudler/LocalAI/core/startup"
"github.com/mudler/LocalAI/pkg/model" "github.com/mudler/LocalAI/pkg/model"
pkgStartup "github.com/mudler/LocalAI/pkg/startup"
"github.com/mudler/LocalAI/pkg/xsysinfo" "github.com/mudler/LocalAI/pkg/xsysinfo"
"github.com/rs/zerolog/log" "github.com/rs/zerolog/log"
) )
@@ -55,11 +55,11 @@ func New(opts ...config.AppOption) (*Application, error) {
} }
} }
if err := coreStartup.InstallModels(options.Galleries, options.BackendGalleries, options.ModelPath, options.BackendsPath, options.EnforcePredownloadScans, options.AutoloadBackendGalleries, nil, options.ModelsURL...); err != nil { if err := pkgStartup.InstallModels(options.Galleries, options.BackendGalleries, options.ModelPath, options.BackendsPath, options.EnforcePredownloadScans, options.AutoloadBackendGalleries, nil, options.ModelsURL...); err != nil {
log.Error().Err(err).Msg("error installing models") log.Error().Err(err).Msg("error installing models")
} }
if err := coreStartup.InstallExternalBackends(options.BackendGalleries, options.BackendsPath, nil, options.ExternalBackends...); err != nil { if err := pkgStartup.InstallExternalBackends(options.BackendGalleries, options.BackendsPath, nil, options.ExternalBackends...); err != nil {
log.Error().Err(err).Msg("error installing external backends") log.Error().Err(err).Msg("error installing external backends")
} }

View File

@@ -1,34 +0,0 @@
package backend
import (
"context"
"fmt"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/pkg/grpc/proto"
"github.com/mudler/LocalAI/pkg/model"
)
func Detection(
sourceFile string,
loader *model.ModelLoader,
appConfig *config.ApplicationConfig,
backendConfig config.BackendConfig,
) (*proto.DetectResponse, error) {
opts := ModelOptions(backendConfig, appConfig)
detectionModel, err := loader.Load(opts...)
if err != nil {
return nil, err
}
defer loader.Close()
if detectionModel == nil {
return nil, fmt.Errorf("could not load detection model")
}
res, err := detectionModel.Detect(context.Background(), &proto.DetectOptions{
Src: sourceFile,
})
return res, err
}

View File

@@ -7,7 +7,7 @@ import (
model "github.com/mudler/LocalAI/pkg/model" model "github.com/mudler/LocalAI/pkg/model"
) )
func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, src, dst string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig, refImages []string) (func() error, error) { func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, src, dst string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() error, error) {
opts := ModelOptions(backendConfig, appConfig) opts := ModelOptions(backendConfig, appConfig)
inferenceModel, err := loader.Load( inferenceModel, err := loader.Load(
@@ -33,7 +33,6 @@ func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negat
Dst: dst, Dst: dst,
Src: src, Src: src,
EnableParameters: backendConfig.Diffusers.EnableParameters, EnableParameters: backendConfig.Diffusers.EnableParameters,
RefImages: refImages,
}) })
return err return err
} }

View File

@@ -8,7 +8,7 @@ import (
"github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/gallery" "github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/startup" "github.com/mudler/LocalAI/pkg/startup"
"github.com/rs/zerolog/log" "github.com/rs/zerolog/log"
"github.com/schollz/progressbar/v3" "github.com/schollz/progressbar/v3"
) )

View File

@@ -9,8 +9,8 @@ import (
"github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/gallery" "github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/startup"
"github.com/mudler/LocalAI/pkg/downloader" "github.com/mudler/LocalAI/pkg/downloader"
"github.com/mudler/LocalAI/pkg/startup"
"github.com/rs/zerolog/log" "github.com/rs/zerolog/log"
"github.com/schollz/progressbar/v3" "github.com/schollz/progressbar/v3"
) )

View File

@@ -25,6 +25,7 @@ type RunCMD struct {
ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"` ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
GeneratedContentPath string `env:"LOCALAI_GENERATED_CONTENT_PATH,GENERATED_CONTENT_PATH" type:"path" default:"/tmp/generated/content" help:"Location for generated content (e.g. images, audio, videos)" group:"storage"` GeneratedContentPath string `env:"LOCALAI_GENERATED_CONTENT_PATH,GENERATED_CONTENT_PATH" type:"path" default:"/tmp/generated/content" help:"Location for generated content (e.g. images, audio, videos)" group:"storage"`
UploadPath string `env:"LOCALAI_UPLOAD_PATH,UPLOAD_PATH" type:"path" default:"/tmp/localai/upload" help:"Path to store uploads from files api" group:"storage"` UploadPath string `env:"LOCALAI_UPLOAD_PATH,UPLOAD_PATH" type:"path" default:"/tmp/localai/upload" help:"Path to store uploads from files api" group:"storage"`
ConfigPath string `env:"LOCALAI_CONFIG_PATH,CONFIG_PATH" default:"/tmp/localai/config" group:"storage"`
LocalaiConfigDir string `env:"LOCALAI_CONFIG_DIR" type:"path" default:"${basepath}/configuration" help:"Directory for dynamic loading of certain configuration files (currently api_keys.json and external_backends.json)" group:"storage"` LocalaiConfigDir string `env:"LOCALAI_CONFIG_DIR" type:"path" default:"${basepath}/configuration" help:"Directory for dynamic loading of certain configuration files (currently api_keys.json and external_backends.json)" group:"storage"`
LocalaiConfigDirPollInterval time.Duration `env:"LOCALAI_CONFIG_DIR_POLL_INTERVAL" help:"Typically the config path picks up changes automatically, but if your system has broken fsnotify events, set this to an interval to poll the LocalAI Config Dir (example: 1m)" group:"storage"` LocalaiConfigDirPollInterval time.Duration `env:"LOCALAI_CONFIG_DIR_POLL_INTERVAL" help:"Typically the config path picks up changes automatically, but if your system has broken fsnotify events, set this to an interval to poll the LocalAI Config Dir (example: 1m)" group:"storage"`
// The alias on this option is there to preserve functionality with the old `--config-file` parameter // The alias on this option is there to preserve functionality with the old `--config-file` parameter
@@ -87,6 +88,7 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
config.WithDebug(zerolog.GlobalLevel() <= zerolog.DebugLevel), config.WithDebug(zerolog.GlobalLevel() <= zerolog.DebugLevel),
config.WithGeneratedContentDir(r.GeneratedContentPath), config.WithGeneratedContentDir(r.GeneratedContentPath),
config.WithUploadDir(r.UploadPath), config.WithUploadDir(r.UploadPath),
config.WithConfigsDir(r.ConfigPath),
config.WithDynamicConfigDir(r.LocalaiConfigDir), config.WithDynamicConfigDir(r.LocalaiConfigDir),
config.WithDynamicConfigDirPollInterval(r.LocalaiConfigDirPollInterval), config.WithDynamicConfigDirPollInterval(r.LocalaiConfigDirPollInterval),
config.WithF16(r.F16), config.WithF16(r.F16),

View File

@@ -72,7 +72,7 @@ func (u *CreateOCIImageCMD) Run(ctx *cliContext.Context) error {
} }
func (u *GGUFInfoCMD) Run(ctx *cliContext.Context) error { func (u *GGUFInfoCMD) Run(ctx *cliContext.Context) error {
if len(u.Args) == 0 { if u.Args == nil || len(u.Args) == 0 {
return fmt.Errorf("no GGUF file provided") return fmt.Errorf("no GGUF file provided")
} }
// We try to guess only if we don't have a template defined already // We try to guess only if we don't have a template defined already

View File

@@ -21,7 +21,8 @@ type ApplicationConfig struct {
Debug bool Debug bool
GeneratedContentDir string GeneratedContentDir string
UploadDir string ConfigsDir string
UploadDir string
DynamicConfigsDir string DynamicConfigsDir string
DynamicConfigsDirPollInterval time.Duration DynamicConfigsDirPollInterval time.Duration
@@ -301,6 +302,12 @@ func WithUploadDir(uploadDir string) AppOption {
} }
} }
func WithConfigsDir(configsDir string) AppOption {
return func(o *ApplicationConfig) {
o.ConfigsDir = configsDir
}
}
func WithDynamicConfigDir(dynamicConfigsDir string) AppOption { func WithDynamicConfigDir(dynamicConfigsDir string) AppOption {
return func(o *ApplicationConfig) { return func(o *ApplicationConfig) {
o.DynamicConfigsDir = dynamicConfigsDir o.DynamicConfigsDir = dynamicConfigsDir

View File

@@ -458,7 +458,6 @@ const (
FLAG_TOKENIZE BackendConfigUsecases = 0b001000000000 FLAG_TOKENIZE BackendConfigUsecases = 0b001000000000
FLAG_VAD BackendConfigUsecases = 0b010000000000 FLAG_VAD BackendConfigUsecases = 0b010000000000
FLAG_VIDEO BackendConfigUsecases = 0b100000000000 FLAG_VIDEO BackendConfigUsecases = 0b100000000000
FLAG_DETECTION BackendConfigUsecases = 0b1000000000000
// Common Subsets // Common Subsets
FLAG_LLM BackendConfigUsecases = FLAG_CHAT | FLAG_COMPLETION | FLAG_EDIT FLAG_LLM BackendConfigUsecases = FLAG_CHAT | FLAG_COMPLETION | FLAG_EDIT
@@ -480,7 +479,6 @@ func GetAllBackendConfigUsecases() map[string]BackendConfigUsecases {
"FLAG_VAD": FLAG_VAD, "FLAG_VAD": FLAG_VAD,
"FLAG_LLM": FLAG_LLM, "FLAG_LLM": FLAG_LLM,
"FLAG_VIDEO": FLAG_VIDEO, "FLAG_VIDEO": FLAG_VIDEO,
"FLAG_DETECTION": FLAG_DETECTION,
} }
} }
@@ -574,12 +572,6 @@ func (c *BackendConfig) GuessUsecases(u BackendConfigUsecases) bool {
} }
} }
if (u & FLAG_DETECTION) == FLAG_DETECTION {
if c.Backend != "rfdetr" {
return false
}
}
if (u & FLAG_SOUND_GENERATION) == FLAG_SOUND_GENERATION { if (u & FLAG_SOUND_GENERATION) == FLAG_SOUND_GENERATION {
if c.Backend != "transformers-musicgen" { if c.Backend != "transformers-musicgen" {
return false return false

View File

@@ -2,8 +2,7 @@ package gallery
import ( import (
"github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/pkg/system" "github.com/mudler/LocalAI/core/system"
"github.com/rs/zerolog/log"
) )
// BackendMetadata represents the metadata stored in a JSON file for each installed backend // BackendMetadata represents the metadata stored in a JSON file for each installed backend
@@ -24,7 +23,6 @@ type GalleryBackend struct {
Metadata `json:",inline" yaml:",inline"` Metadata `json:",inline" yaml:",inline"`
Alias string `json:"alias,omitempty" yaml:"alias,omitempty"` Alias string `json:"alias,omitempty" yaml:"alias,omitempty"`
URI string `json:"uri,omitempty" yaml:"uri,omitempty"` URI string `json:"uri,omitempty" yaml:"uri,omitempty"`
Mirrors []string `json:"mirrors,omitempty" yaml:"mirrors,omitempty"`
CapabilitiesMap map[string]string `json:"capabilities,omitempty" yaml:"capabilities,omitempty"` CapabilitiesMap map[string]string `json:"capabilities,omitempty" yaml:"capabilities,omitempty"`
} }
@@ -35,11 +33,9 @@ func (backend *GalleryBackend) FindBestBackendFromMeta(systemState *system.Syste
realBackend := backend.CapabilitiesMap[systemState.Capability(backend.CapabilitiesMap)] realBackend := backend.CapabilitiesMap[systemState.Capability(backend.CapabilitiesMap)]
if realBackend == "" { if realBackend == "" {
log.Debug().Str("backend", backend.Name).Str("reportedCapability", systemState.Capability(backend.CapabilitiesMap)).Msg("No backend found for reported capability")
return nil return nil
} }
log.Debug().Str("backend", backend.Name).Str("reportedCapability", systemState.Capability(backend.CapabilitiesMap)).Msg("Found backend for reported capability")
return backends.FindByName(realBackend) return backends.FindByName(realBackend)
} }

View File

@@ -8,9 +8,9 @@ import (
"time" "time"
"github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/system"
"github.com/mudler/LocalAI/pkg/downloader" "github.com/mudler/LocalAI/pkg/downloader"
"github.com/mudler/LocalAI/pkg/model" "github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/LocalAI/pkg/system"
"github.com/rs/zerolog/log" "github.com/rs/zerolog/log"
) )
@@ -146,18 +146,7 @@ func InstallBackend(basePath string, config *GalleryBackend, downloadStatus func
uri := downloader.URI(config.URI) uri := downloader.URI(config.URI)
if err := uri.DownloadFile(backendPath, "", 1, 1, downloadStatus); err != nil { if err := uri.DownloadFile(backendPath, "", 1, 1, downloadStatus); err != nil {
success := false return fmt.Errorf("failed to download backend %q: %v", config.URI, err)
// Try to download from mirrors
for _, mirror := range config.Mirrors {
if err := downloader.URI(mirror).DownloadFile(backendPath, "", 1, 1, downloadStatus); err == nil {
success = true
break
}
}
if !success {
return fmt.Errorf("failed to download backend %q: %v", config.URI, err)
}
} }
// Create metadata for the backend // Create metadata for the backend

View File

@@ -7,7 +7,7 @@ import (
"runtime" "runtime"
"github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/pkg/system" "github.com/mudler/LocalAI/core/system"
. "github.com/onsi/ginkgo/v2" . "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega" . "github.com/onsi/gomega"
"gopkg.in/yaml.v2" "gopkg.in/yaml.v2"

View File

@@ -95,7 +95,7 @@ func FindGalleryElement[T GalleryElement](models []T, name string, basePath stri
if !strings.Contains(name, "@") { if !strings.Contains(name, "@") {
for _, m := range models { for _, m := range models {
if strings.EqualFold(strings.ToLower(m.GetName()), strings.ToLower(name)) { if strings.EqualFold(m.GetName(), name) {
model = m model = m
break break
} }
@@ -103,7 +103,7 @@ func FindGalleryElement[T GalleryElement](models []T, name string, basePath stri
} else { } else {
for _, m := range models { for _, m := range models {
if strings.EqualFold(strings.ToLower(name), strings.ToLower(fmt.Sprintf("%s@%s", m.GetGallery().Name, m.GetName()))) { if strings.EqualFold(name, fmt.Sprintf("%s@%s", m.GetGallery().Name, m.GetName())) {
model = m model = m
break break
} }

View File

@@ -10,8 +10,8 @@ import (
"dario.cat/mergo" "dario.cat/mergo"
"github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/config"
lconfig "github.com/mudler/LocalAI/core/config" lconfig "github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/system"
"github.com/mudler/LocalAI/pkg/downloader" "github.com/mudler/LocalAI/pkg/downloader"
"github.com/mudler/LocalAI/pkg/system"
"github.com/mudler/LocalAI/pkg/utils" "github.com/mudler/LocalAI/pkg/utils"
"github.com/rs/zerolog/log" "github.com/rs/zerolog/log"

View File

@@ -10,8 +10,10 @@ import (
"github.com/dave-gray101/v2keyauth" "github.com/dave-gray101/v2keyauth"
"github.com/gofiber/websocket/v2" "github.com/gofiber/websocket/v2"
"github.com/mudler/LocalAI/pkg/utils"
"github.com/mudler/LocalAI/core/http/endpoints/localai" "github.com/mudler/LocalAI/core/http/endpoints/localai"
"github.com/mudler/LocalAI/core/http/endpoints/openai"
"github.com/mudler/LocalAI/core/http/middleware" "github.com/mudler/LocalAI/core/http/middleware"
"github.com/mudler/LocalAI/core/http/routes" "github.com/mudler/LocalAI/core/http/routes"
@@ -197,6 +199,11 @@ func API(application *application.Application) (*fiber.App, error) {
router.Use(csrf.New()) router.Use(csrf.New())
} }
// Load config jsons
utils.LoadConfig(application.ApplicationConfig().UploadDir, openai.UploadedFilesFile, &openai.UploadedFiles)
utils.LoadConfig(application.ApplicationConfig().ConfigsDir, openai.AssistantsConfigFile, &openai.Assistants)
utils.LoadConfig(application.ApplicationConfig().ConfigsDir, openai.AssistantsFileConfigFile, &openai.AssistantFiles)
galleryService := services.NewGalleryService(application.ApplicationConfig(), application.ModelLoader()) galleryService := services.NewGalleryService(application.ApplicationConfig(), application.ModelLoader())
err = galleryService.Start(application.ApplicationConfig().Context, application.BackendLoader()) err = galleryService.Start(application.ApplicationConfig().Context, application.BackendLoader())
if err != nil { if err != nil {

View File

@@ -34,7 +34,7 @@ func CreateBackendEndpointService(galleries []config.Gallery, backendPath string
// GetOpStatusEndpoint returns the job status // GetOpStatusEndpoint returns the job status
// @Summary Returns the job status // @Summary Returns the job status
// @Success 200 {object} services.GalleryOpStatus "Response" // @Success 200 {object} services.BackendOpStatus "Response"
// @Router /backends/jobs/{uuid} [get] // @Router /backends/jobs/{uuid} [get]
func (mgs *BackendEndpointService) GetOpStatusEndpoint() func(c *fiber.Ctx) error { func (mgs *BackendEndpointService) GetOpStatusEndpoint() func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error {
@@ -48,7 +48,7 @@ func (mgs *BackendEndpointService) GetOpStatusEndpoint() func(c *fiber.Ctx) erro
// GetAllStatusEndpoint returns all the jobs status progress // GetAllStatusEndpoint returns all the jobs status progress
// @Summary Returns all the jobs status progress // @Summary Returns all the jobs status progress
// @Success 200 {object} map[string]services.GalleryOpStatus "Response" // @Success 200 {object} map[string]services.BackendOpStatus "Response"
// @Router /backends/jobs [get] // @Router /backends/jobs [get]
func (mgs *BackendEndpointService) GetAllStatusEndpoint() func(c *fiber.Ctx) error { func (mgs *BackendEndpointService) GetAllStatusEndpoint() func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error {
@@ -58,7 +58,7 @@ func (mgs *BackendEndpointService) GetAllStatusEndpoint() func(c *fiber.Ctx) err
// ApplyBackendEndpoint installs a new backend to a LocalAI instance // ApplyBackendEndpoint installs a new backend to a LocalAI instance
// @Summary Install backends to LocalAI. // @Summary Install backends to LocalAI.
// @Param request body GalleryBackend true "query params" // @Param request body BackendModel true "query params"
// @Success 200 {object} schema.BackendResponse "Response" // @Success 200 {object} schema.BackendResponse "Response"
// @Router /backends/apply [post] // @Router /backends/apply [post]
func (mgs *BackendEndpointService) ApplyBackendEndpoint() func(c *fiber.Ctx) error { func (mgs *BackendEndpointService) ApplyBackendEndpoint() func(c *fiber.Ctx) error {

View File

@@ -1,59 +0,0 @@
package localai
import (
"github.com/gofiber/fiber/v2"
"github.com/mudler/LocalAI/core/backend"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/http/middleware"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/LocalAI/pkg/utils"
"github.com/rs/zerolog/log"
)
// DetectionEndpoint is the LocalAI Detection endpoint https://localai.io/docs/api-reference/detection
// @Summary Detects objects in the input image.
// @Param request body schema.DetectionRequest true "query params"
// @Success 200 {object} schema.DetectionResponse "Response"
// @Router /v1/detection [post]
func DetectionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.DetectionRequest)
if !ok || input.Model == "" {
return fiber.ErrBadRequest
}
cfg, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
if !ok || cfg == nil {
return fiber.ErrBadRequest
}
log.Debug().Str("image", input.Image).Str("modelFile", "modelFile").Str("backend", cfg.Backend).Msg("Detection")
image, err := utils.GetContentURIAsBase64(input.Image)
if err != nil {
return err
}
res, err := backend.Detection(image, ml, appConfig, *cfg)
if err != nil {
return err
}
response := schema.DetectionResponse{
Detections: make([]schema.Detection, len(res.Detections)),
}
for i, detection := range res.Detections {
response.Detections[i] = schema.Detection{
X: detection.X,
Y: detection.Y,
Width: detection.Width,
Height: detection.Height,
ClassName: detection.ClassName,
}
}
return c.JSON(response)
}
}

View File

@@ -15,10 +15,9 @@ import (
) )
type ModelGalleryEndpointService struct { type ModelGalleryEndpointService struct {
galleries []config.Gallery galleries []config.Gallery
backendGalleries []config.Gallery modelPath string
modelPath string galleryApplier *services.GalleryService
galleryApplier *services.GalleryService
} }
type GalleryModel struct { type GalleryModel struct {
@@ -26,12 +25,11 @@ type GalleryModel struct {
gallery.GalleryModel gallery.GalleryModel
} }
func CreateModelGalleryEndpointService(galleries []config.Gallery, backendGalleries []config.Gallery, modelPath string, galleryApplier *services.GalleryService) ModelGalleryEndpointService { func CreateModelGalleryEndpointService(galleries []config.Gallery, modelPath string, galleryApplier *services.GalleryService) ModelGalleryEndpointService {
return ModelGalleryEndpointService{ return ModelGalleryEndpointService{
galleries: galleries, galleries: galleries,
backendGalleries: backendGalleries, modelPath: modelPath,
modelPath: modelPath, galleryApplier: galleryApplier,
galleryApplier: galleryApplier,
} }
} }
@@ -81,7 +79,6 @@ func (mgs *ModelGalleryEndpointService) ApplyModelGalleryEndpoint() func(c *fibe
ID: uuid.String(), ID: uuid.String(),
GalleryElementName: input.ID, GalleryElementName: input.ID,
Galleries: mgs.galleries, Galleries: mgs.galleries,
BackendGalleries: mgs.backendGalleries,
} }
return c.JSON(schema.GalleryResponse{ID: uuid.String(), StatusURL: fmt.Sprintf("%smodels/jobs/%s", utils.BaseURL(c), uuid.String())}) return c.JSON(schema.GalleryResponse{ID: uuid.String(), StatusURL: fmt.Sprintf("%smodels/jobs/%s", utils.BaseURL(c), uuid.String())})

View File

@@ -0,0 +1,522 @@
package openai
import (
"fmt"
"net/http"
"sort"
"strconv"
"strings"
"sync/atomic"
"time"
"github.com/gofiber/fiber/v2"
"github.com/microcosm-cc/bluemonday"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/core/services"
model "github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/LocalAI/pkg/utils"
"github.com/rs/zerolog/log"
)
// ToolType defines a type for tool options
type ToolType string
const (
CodeInterpreter ToolType = "code_interpreter"
Retrieval ToolType = "retrieval"
Function ToolType = "function"
MaxCharacterInstructions = 32768
MaxCharacterDescription = 512
MaxCharacterName = 256
MaxToolsSize = 128
MaxFileIdSize = 20
MaxCharacterMetadataKey = 64
MaxCharacterMetadataValue = 512
)
type Tool struct {
Type ToolType `json:"type"`
}
// Assistant represents the structure of an assistant object from the OpenAI API.
type Assistant struct {
ID string `json:"id"` // The unique identifier of the assistant.
Object string `json:"object"` // Object type, which is "assistant".
Created int64 `json:"created"` // The time at which the assistant was created.
Model string `json:"model"` // The model ID used by the assistant.
Name string `json:"name,omitempty"` // The name of the assistant.
Description string `json:"description,omitempty"` // The description of the assistant.
Instructions string `json:"instructions,omitempty"` // The system instructions that the assistant uses.
Tools []Tool `json:"tools,omitempty"` // A list of tools enabled on the assistant.
FileIDs []string `json:"file_ids,omitempty"` // A list of file IDs attached to this assistant.
Metadata map[string]string `json:"metadata,omitempty"` // Set of key-value pairs attached to the assistant.
}
var (
Assistants = []Assistant{} // better to return empty array instead of "null"
AssistantsConfigFile = "assistants.json"
)
type AssistantRequest struct {
Model string `json:"model"`
Name string `json:"name,omitempty"`
Description string `json:"description,omitempty"`
Instructions string `json:"instructions,omitempty"`
Tools []Tool `json:"tools,omitempty"`
FileIDs []string `json:"file_ids,omitempty"`
Metadata map[string]string `json:"metadata,omitempty"`
}
// CreateAssistantEndpoint is the OpenAI Assistant API endpoint https://platform.openai.com/docs/api-reference/assistants/createAssistant
// @Summary Create an assistant with a model and instructions.
// @Param request body AssistantRequest true "query params"
// @Success 200 {object} Assistant "Response"
// @Router /v1/assistants [post]
func CreateAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
request := new(AssistantRequest)
if err := c.BodyParser(request); err != nil {
log.Warn().AnErr("Unable to parse AssistantRequest", err)
return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{"error": "Cannot parse JSON"})
}
if !modelExists(cl, ml, request.Model) {
log.Warn().Msgf("Model: %s was not found in list of models.", request.Model)
return c.Status(fiber.StatusBadRequest).SendString(bluemonday.StrictPolicy().Sanitize(fmt.Sprintf("Model %q not found", request.Model)))
}
if request.Tools == nil {
request.Tools = []Tool{}
}
if request.FileIDs == nil {
request.FileIDs = []string{}
}
if request.Metadata == nil {
request.Metadata = make(map[string]string)
}
id := "asst_" + strconv.FormatInt(generateRandomID(), 10)
assistant := Assistant{
ID: id,
Object: "assistant",
Created: time.Now().Unix(),
Model: request.Model,
Name: request.Name,
Description: request.Description,
Instructions: request.Instructions,
Tools: request.Tools,
FileIDs: request.FileIDs,
Metadata: request.Metadata,
}
Assistants = append(Assistants, assistant)
utils.SaveConfig(appConfig.ConfigsDir, AssistantsConfigFile, Assistants)
return c.Status(fiber.StatusOK).JSON(assistant)
}
}
var currentId int64 = 0
func generateRandomID() int64 {
atomic.AddInt64(&currentId, 1)
return currentId
}
// ListAssistantsEndpoint is the OpenAI Assistant API endpoint to list assistents https://platform.openai.com/docs/api-reference/assistants/listAssistants
// @Summary List available assistents
// @Param limit query int false "Limit the number of assistants returned"
// @Param order query string false "Order of assistants returned"
// @Param after query string false "Return assistants created after the given ID"
// @Param before query string false "Return assistants created before the given ID"
// @Success 200 {object} []Assistant "Response"
// @Router /v1/assistants [get]
func ListAssistantsEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
// Because we're altering the existing assistants list we should just duplicate it for now.
returnAssistants := Assistants
// Parse query parameters
limitQuery := c.Query("limit", "20")
orderQuery := c.Query("order", "desc")
afterQuery := c.Query("after")
beforeQuery := c.Query("before")
// Convert string limit to integer
limit, err := strconv.Atoi(limitQuery)
if err != nil {
return c.Status(http.StatusBadRequest).SendString(bluemonday.StrictPolicy().Sanitize(fmt.Sprintf("Invalid limit query value: %s", limitQuery)))
}
// Sort assistants
sort.SliceStable(returnAssistants, func(i, j int) bool {
if orderQuery == "asc" {
return returnAssistants[i].Created < returnAssistants[j].Created
}
return returnAssistants[i].Created > returnAssistants[j].Created
})
// After and before cursors
if afterQuery != "" {
returnAssistants = filterAssistantsAfterID(returnAssistants, afterQuery)
}
if beforeQuery != "" {
returnAssistants = filterAssistantsBeforeID(returnAssistants, beforeQuery)
}
// Apply limit
if limit < len(returnAssistants) {
returnAssistants = returnAssistants[:limit]
}
return c.JSON(returnAssistants)
}
}
// FilterAssistantsBeforeID filters out those assistants whose ID comes before the given ID
// We assume that the assistants are already sorted
func filterAssistantsBeforeID(assistants []Assistant, id string) []Assistant {
idInt, err := strconv.Atoi(id)
if err != nil {
return assistants // Return original slice if invalid id format is provided
}
var filteredAssistants []Assistant
for _, assistant := range assistants {
aid, err := strconv.Atoi(strings.TrimPrefix(assistant.ID, "asst_"))
if err != nil {
continue // Skip if invalid id in assistant
}
if aid < idInt {
filteredAssistants = append(filteredAssistants, assistant)
}
}
return filteredAssistants
}
// FilterAssistantsAfterID filters out those assistants whose ID comes after the given ID
// We assume that the assistants are already sorted
func filterAssistantsAfterID(assistants []Assistant, id string) []Assistant {
idInt, err := strconv.Atoi(id)
if err != nil {
return assistants // Return original slice if invalid id format is provided
}
var filteredAssistants []Assistant
for _, assistant := range assistants {
aid, err := strconv.Atoi(strings.TrimPrefix(assistant.ID, "asst_"))
if err != nil {
continue // Skip if invalid id in assistant
}
if aid > idInt {
filteredAssistants = append(filteredAssistants, assistant)
}
}
return filteredAssistants
}
func modelExists(cl *config.BackendConfigLoader, ml *model.ModelLoader, modelName string) (found bool) {
found = false
models, err := services.ListModels(cl, ml, config.NoFilterFn, services.SKIP_IF_CONFIGURED)
if err != nil {
return
}
for _, model := range models {
if model == modelName {
found = true
return
}
}
return
}
// DeleteAssistantEndpoint is the OpenAI Assistant API endpoint to delete assistents https://platform.openai.com/docs/api-reference/assistants/deleteAssistant
// @Summary Delete assistents
// @Success 200 {object} schema.DeleteAssistantResponse "Response"
// @Router /v1/assistants/{assistant_id} [delete]
func DeleteAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
assistantID := c.Params("assistant_id")
if assistantID == "" {
return c.Status(fiber.StatusBadRequest).SendString("parameter assistant_id is required")
}
for i, assistant := range Assistants {
if assistant.ID == assistantID {
Assistants = append(Assistants[:i], Assistants[i+1:]...)
utils.SaveConfig(appConfig.ConfigsDir, AssistantsConfigFile, Assistants)
return c.Status(fiber.StatusOK).JSON(schema.DeleteAssistantResponse{
ID: assistantID,
Object: "assistant.deleted",
Deleted: true,
})
}
}
log.Warn().Msgf("Unable to find assistant %s for deletion", assistantID)
return c.Status(fiber.StatusNotFound).JSON(schema.DeleteAssistantResponse{
ID: assistantID,
Object: "assistant.deleted",
Deleted: false,
})
}
}
// GetAssistantEndpoint is the OpenAI Assistant API endpoint to get assistents https://platform.openai.com/docs/api-reference/assistants/getAssistant
// @Summary Get assistent data
// @Success 200 {object} Assistant "Response"
// @Router /v1/assistants/{assistant_id} [get]
func GetAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
assistantID := c.Params("assistant_id")
if assistantID == "" {
return c.Status(fiber.StatusBadRequest).SendString("parameter assistant_id is required")
}
for _, assistant := range Assistants {
if assistant.ID == assistantID {
return c.Status(fiber.StatusOK).JSON(assistant)
}
}
return c.Status(fiber.StatusNotFound).SendString(bluemonday.StrictPolicy().Sanitize(fmt.Sprintf("Unable to find assistant with id: %s", assistantID)))
}
}
type AssistantFile struct {
ID string `json:"id"`
Object string `json:"object"`
CreatedAt int64 `json:"created_at"`
AssistantID string `json:"assistant_id"`
}
var (
AssistantFiles []AssistantFile
AssistantsFileConfigFile = "assistantsFile.json"
)
func CreateAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
request := new(schema.AssistantFileRequest)
if err := c.BodyParser(request); err != nil {
return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{"error": "Cannot parse JSON"})
}
assistantID := c.Params("assistant_id")
if assistantID == "" {
return c.Status(fiber.StatusBadRequest).SendString("parameter assistant_id is required")
}
for _, assistant := range Assistants {
if assistant.ID == assistantID {
if len(assistant.FileIDs) > MaxFileIdSize {
return c.Status(fiber.StatusBadRequest).SendString(fmt.Sprintf("Max files %d for assistant %s reached.", MaxFileIdSize, assistant.Name))
}
for _, file := range UploadedFiles {
if file.ID == request.FileID {
assistant.FileIDs = append(assistant.FileIDs, request.FileID)
assistantFile := AssistantFile{
ID: file.ID,
Object: "assistant.file",
CreatedAt: time.Now().Unix(),
AssistantID: assistant.ID,
}
AssistantFiles = append(AssistantFiles, assistantFile)
utils.SaveConfig(appConfig.ConfigsDir, AssistantsFileConfigFile, AssistantFiles)
return c.Status(fiber.StatusOK).JSON(assistantFile)
}
}
return c.Status(fiber.StatusNotFound).SendString(bluemonday.StrictPolicy().Sanitize(fmt.Sprintf("Unable to find file_id: %s", request.FileID)))
}
}
return c.Status(fiber.StatusNotFound).SendString(bluemonday.StrictPolicy().Sanitize(fmt.Sprintf("Unable to find %q", assistantID)))
}
}
func ListAssistantFilesEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
type ListAssistantFiles struct {
Data []schema.File
Object string
}
return func(c *fiber.Ctx) error {
assistantID := c.Params("assistant_id")
if assistantID == "" {
return c.Status(fiber.StatusBadRequest).SendString("parameter assistant_id is required")
}
limitQuery := c.Query("limit", "20")
order := c.Query("order", "desc")
limit, err := strconv.Atoi(limitQuery)
if err != nil || limit < 1 || limit > 100 {
limit = 20 // Default to 20 if there's an error or the limit is out of bounds
}
// Sort files by CreatedAt depending on the order query parameter
if order == "asc" {
sort.Slice(AssistantFiles, func(i, j int) bool {
return AssistantFiles[i].CreatedAt < AssistantFiles[j].CreatedAt
})
} else { // default to "desc"
sort.Slice(AssistantFiles, func(i, j int) bool {
return AssistantFiles[i].CreatedAt > AssistantFiles[j].CreatedAt
})
}
// Limit the number of files returned
var limitedFiles []AssistantFile
hasMore := false
if len(AssistantFiles) > limit {
hasMore = true
limitedFiles = AssistantFiles[:limit]
} else {
limitedFiles = AssistantFiles
}
response := map[string]interface{}{
"object": "list",
"data": limitedFiles,
"first_id": func() string {
if len(limitedFiles) > 0 {
return limitedFiles[0].ID
}
return ""
}(),
"last_id": func() string {
if len(limitedFiles) > 0 {
return limitedFiles[len(limitedFiles)-1].ID
}
return ""
}(),
"has_more": hasMore,
}
return c.Status(fiber.StatusOK).JSON(response)
}
}
func ModifyAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
request := new(AssistantRequest)
if err := c.BodyParser(request); err != nil {
log.Warn().AnErr("Unable to parse AssistantRequest", err)
return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{"error": "Cannot parse JSON"})
}
assistantID := c.Params("assistant_id")
if assistantID == "" {
return c.Status(fiber.StatusBadRequest).SendString("parameter assistant_id is required")
}
for i, assistant := range Assistants {
if assistant.ID == assistantID {
newAssistant := Assistant{
ID: assistantID,
Object: assistant.Object,
Created: assistant.Created,
Model: request.Model,
Name: request.Name,
Description: request.Description,
Instructions: request.Instructions,
Tools: request.Tools,
FileIDs: request.FileIDs, // todo: should probably verify fileids exist
Metadata: request.Metadata,
}
// Remove old one and replace with new one
Assistants = append(Assistants[:i], Assistants[i+1:]...)
Assistants = append(Assistants, newAssistant)
utils.SaveConfig(appConfig.ConfigsDir, AssistantsConfigFile, Assistants)
return c.Status(fiber.StatusOK).JSON(newAssistant)
}
}
return c.Status(fiber.StatusNotFound).SendString(bluemonday.StrictPolicy().Sanitize(fmt.Sprintf("Unable to find assistant with id: %s", assistantID)))
}
}
func DeleteAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
assistantID := c.Params("assistant_id")
fileId := c.Params("file_id")
if assistantID == "" {
return c.Status(fiber.StatusBadRequest).SendString("parameter assistant_id and file_id are required")
}
// First remove file from assistant
for i, assistant := range Assistants {
if assistant.ID == assistantID {
for j, fileId := range assistant.FileIDs {
Assistants[i].FileIDs = append(Assistants[i].FileIDs[:j], Assistants[i].FileIDs[j+1:]...)
// Check if the file exists in the assistantFiles slice
for i, assistantFile := range AssistantFiles {
if assistantFile.ID == fileId {
// Remove the file from the assistantFiles slice
AssistantFiles = append(AssistantFiles[:i], AssistantFiles[i+1:]...)
utils.SaveConfig(appConfig.ConfigsDir, AssistantsFileConfigFile, AssistantFiles)
return c.Status(fiber.StatusOK).JSON(schema.DeleteAssistantFileResponse{
ID: fileId,
Object: "assistant.file.deleted",
Deleted: true,
})
}
}
}
log.Warn().Msgf("Unable to locate file_id: %s in assistants: %s. Continuing to delete assistant file.", fileId, assistantID)
for i, assistantFile := range AssistantFiles {
if assistantFile.AssistantID == assistantID {
AssistantFiles = append(AssistantFiles[:i], AssistantFiles[i+1:]...)
utils.SaveConfig(appConfig.ConfigsDir, AssistantsFileConfigFile, AssistantFiles)
return c.Status(fiber.StatusNotFound).JSON(schema.DeleteAssistantFileResponse{
ID: fileId,
Object: "assistant.file.deleted",
Deleted: true,
})
}
}
}
}
log.Warn().Msgf("Unable to find assistant: %s", assistantID)
return c.Status(fiber.StatusNotFound).JSON(schema.DeleteAssistantFileResponse{
ID: fileId,
Object: "assistant.file.deleted",
Deleted: false,
})
}
}
func GetAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
assistantID := c.Params("assistant_id")
fileId := c.Params("file_id")
if assistantID == "" {
return c.Status(fiber.StatusBadRequest).SendString("parameter assistant_id and file_id are required")
}
for _, assistantFile := range AssistantFiles {
if assistantFile.AssistantID == assistantID {
if assistantFile.ID == fileId {
return c.Status(fiber.StatusOK).JSON(assistantFile)
}
return c.Status(fiber.StatusNotFound).SendString(bluemonday.StrictPolicy().Sanitize(fmt.Sprintf("Unable to find assistant file with file_id: %s", fileId)))
}
}
return c.Status(fiber.StatusNotFound).SendString(bluemonday.StrictPolicy().Sanitize(fmt.Sprintf("Unable to find assistant file with assistant_id: %s", assistantID)))
}
}

View File

@@ -0,0 +1,460 @@
package openai
import (
"encoding/json"
"fmt"
"io"
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"strings"
"testing"
"time"
"github.com/gofiber/fiber/v2"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/model"
"github.com/stretchr/testify/assert"
)
var configsDir string = "/tmp/localai/configs"
type MockLoader struct {
models []string
}
func tearDown() func() {
return func() {
UploadedFiles = []schema.File{}
Assistants = []Assistant{}
AssistantFiles = []AssistantFile{}
_ = os.Remove(filepath.Join(configsDir, AssistantsConfigFile))
_ = os.Remove(filepath.Join(configsDir, AssistantsFileConfigFile))
}
}
func TestAssistantEndpoints(t *testing.T) {
// Preparing the mocked objects
cl := &config.BackendConfigLoader{}
//configsDir := "/tmp/localai/configs"
modelPath := "/tmp/localai/model"
var ml = model.NewModelLoader(modelPath, false)
appConfig := &config.ApplicationConfig{
ConfigsDir: configsDir,
UploadLimitMB: 10,
UploadDir: "test_dir",
ModelPath: modelPath,
}
_ = os.RemoveAll(appConfig.ConfigsDir)
_ = os.MkdirAll(appConfig.ConfigsDir, 0750)
_ = os.MkdirAll(modelPath, 0750)
os.Create(filepath.Join(modelPath, "ggml-gpt4all-j"))
app := fiber.New(fiber.Config{
BodyLimit: 20 * 1024 * 1024, // sets the limit to 20MB.
})
// Create a Test Server
app.Get("/assistants", ListAssistantsEndpoint(cl, ml, appConfig))
app.Post("/assistants", CreateAssistantEndpoint(cl, ml, appConfig))
app.Delete("/assistants/:assistant_id", DeleteAssistantEndpoint(cl, ml, appConfig))
app.Get("/assistants/:assistant_id", GetAssistantEndpoint(cl, ml, appConfig))
app.Post("/assistants/:assistant_id", ModifyAssistantEndpoint(cl, ml, appConfig))
app.Post("/files", UploadFilesEndpoint(cl, appConfig))
app.Get("/assistants/:assistant_id/files", ListAssistantFilesEndpoint(cl, ml, appConfig))
app.Post("/assistants/:assistant_id/files", CreateAssistantFileEndpoint(cl, ml, appConfig))
app.Delete("/assistants/:assistant_id/files/:file_id", DeleteAssistantFileEndpoint(cl, ml, appConfig))
app.Get("/assistants/:assistant_id/files/:file_id", GetAssistantFileEndpoint(cl, ml, appConfig))
t.Run("CreateAssistantEndpoint", func(t *testing.T) {
t.Cleanup(tearDown())
ar := &AssistantRequest{
Model: "ggml-gpt4all-j",
Name: "3.5-turbo",
Description: "Test Assistant",
Instructions: "You are computer science teacher answering student questions",
Tools: []Tool{{Type: Function}},
FileIDs: nil,
Metadata: nil,
}
resultAssistant, resp, err := createAssistant(app, *ar)
assert.NoError(t, err)
assert.Equal(t, fiber.StatusOK, resp.StatusCode)
assert.Equal(t, 1, len(Assistants))
//t.Cleanup(cleanupAllAssistants(t, app, []string{resultAssistant.ID}))
assert.Equal(t, ar.Name, resultAssistant.Name)
assert.Equal(t, ar.Model, resultAssistant.Model)
assert.Equal(t, ar.Tools, resultAssistant.Tools)
assert.Equal(t, ar.Description, resultAssistant.Description)
assert.Equal(t, ar.Instructions, resultAssistant.Instructions)
assert.Equal(t, ar.FileIDs, resultAssistant.FileIDs)
assert.Equal(t, ar.Metadata, resultAssistant.Metadata)
})
t.Run("ListAssistantsEndpoint", func(t *testing.T) {
var ids []string
var resultAssistant []Assistant
for i := 0; i < 4; i++ {
ar := &AssistantRequest{
Model: "ggml-gpt4all-j",
Name: fmt.Sprintf("3.5-turbo-%d", i),
Description: fmt.Sprintf("Test Assistant - %d", i),
Instructions: fmt.Sprintf("You are computer science teacher answering student questions - %d", i),
Tools: []Tool{{Type: Function}},
FileIDs: []string{"fid-1234"},
Metadata: map[string]string{"meta": "data"},
}
//var err error
ra, _, err := createAssistant(app, *ar)
// Because we create the assistants so fast all end up with the same created time.
time.Sleep(time.Second)
resultAssistant = append(resultAssistant, ra)
assert.NoError(t, err)
ids = append(ids, resultAssistant[i].ID)
}
t.Cleanup(cleanupAllAssistants(t, app, ids))
tests := []struct {
name string
reqURL string
expectedStatus int
expectedResult []Assistant
expectedStringResult string
}{
{
name: "Valid Usage - limit only",
reqURL: "/assistants?limit=2",
expectedStatus: http.StatusOK,
expectedResult: Assistants[:2], // Expecting the first two assistants
},
{
name: "Valid Usage - order asc",
reqURL: "/assistants?order=asc",
expectedStatus: http.StatusOK,
expectedResult: Assistants, // Expecting all assistants in ascending order
},
{
name: "Valid Usage - order desc",
reqURL: "/assistants?order=desc",
expectedStatus: http.StatusOK,
expectedResult: []Assistant{Assistants[3], Assistants[2], Assistants[1], Assistants[0]}, // Expecting all assistants in descending order
},
{
name: "Valid Usage - after specific ID",
reqURL: "/assistants?after=2",
expectedStatus: http.StatusOK,
// Note this is correct because it's put in descending order already
expectedResult: Assistants[:3], // Expecting assistants after (excluding) ID 2
},
{
name: "Valid Usage - before specific ID",
reqURL: "/assistants?before=4",
expectedStatus: http.StatusOK,
expectedResult: Assistants[2:], // Expecting assistants before (excluding) ID 3.
},
{
name: "Invalid Usage - non-integer limit",
reqURL: "/assistants?limit=two",
expectedStatus: http.StatusBadRequest,
expectedStringResult: "Invalid limit query value: two",
},
{
name: "Invalid Usage - non-existing id in after",
reqURL: "/assistants?after=100",
expectedStatus: http.StatusOK,
expectedResult: []Assistant(nil), // Expecting empty list as there are no IDs above 100
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
request := httptest.NewRequest(http.MethodGet, tt.reqURL, nil)
response, err := app.Test(request)
assert.NoError(t, err)
assert.Equal(t, tt.expectedStatus, response.StatusCode)
if tt.expectedStatus != fiber.StatusOK {
all, _ := io.ReadAll(response.Body)
assert.Equal(t, tt.expectedStringResult, string(all))
} else {
var result []Assistant
err = json.NewDecoder(response.Body).Decode(&result)
assert.NoError(t, err)
assert.Equal(t, tt.expectedResult, result)
}
})
}
})
t.Run("DeleteAssistantEndpoint", func(t *testing.T) {
ar := &AssistantRequest{
Model: "ggml-gpt4all-j",
Name: "3.5-turbo",
Description: "Test Assistant",
Instructions: "You are computer science teacher answering student questions",
Tools: []Tool{{Type: Function}},
FileIDs: nil,
Metadata: nil,
}
resultAssistant, _, err := createAssistant(app, *ar)
assert.NoError(t, err)
target := fmt.Sprintf("/assistants/%s", resultAssistant.ID)
deleteReq := httptest.NewRequest(http.MethodDelete, target, nil)
_, err = app.Test(deleteReq)
assert.NoError(t, err)
assert.Equal(t, 0, len(Assistants))
})
t.Run("GetAssistantEndpoint", func(t *testing.T) {
ar := &AssistantRequest{
Model: "ggml-gpt4all-j",
Name: "3.5-turbo",
Description: "Test Assistant",
Instructions: "You are computer science teacher answering student questions",
Tools: []Tool{{Type: Function}},
FileIDs: nil,
Metadata: nil,
}
resultAssistant, _, err := createAssistant(app, *ar)
assert.NoError(t, err)
t.Cleanup(cleanupAllAssistants(t, app, []string{resultAssistant.ID}))
target := fmt.Sprintf("/assistants/%s", resultAssistant.ID)
request := httptest.NewRequest(http.MethodGet, target, nil)
response, err := app.Test(request)
assert.NoError(t, err)
var getAssistant Assistant
err = json.NewDecoder(response.Body).Decode(&getAssistant)
assert.NoError(t, err)
assert.Equal(t, resultAssistant.ID, getAssistant.ID)
})
t.Run("ModifyAssistantEndpoint", func(t *testing.T) {
ar := &AssistantRequest{
Model: "ggml-gpt4all-j",
Name: "3.5-turbo",
Description: "Test Assistant",
Instructions: "You are computer science teacher answering student questions",
Tools: []Tool{{Type: Function}},
FileIDs: nil,
Metadata: nil,
}
resultAssistant, _, err := createAssistant(app, *ar)
assert.NoError(t, err)
modifiedAr := &AssistantRequest{
Model: "ggml-gpt4all-j",
Name: "4.0-turbo",
Description: "Modified Test Assistant",
Instructions: "You are math teacher answering student questions",
Tools: []Tool{{Type: CodeInterpreter}},
FileIDs: nil,
Metadata: nil,
}
modifiedArJson, err := json.Marshal(modifiedAr)
assert.NoError(t, err)
target := fmt.Sprintf("/assistants/%s", resultAssistant.ID)
request := httptest.NewRequest(http.MethodPost, target, strings.NewReader(string(modifiedArJson)))
request.Header.Set(fiber.HeaderContentType, "application/json")
modifyResponse, err := app.Test(request)
assert.NoError(t, err)
var getAssistant Assistant
err = json.NewDecoder(modifyResponse.Body).Decode(&getAssistant)
assert.NoError(t, err)
t.Cleanup(cleanupAllAssistants(t, app, []string{getAssistant.ID}))
assert.Equal(t, resultAssistant.ID, getAssistant.ID) // IDs should match even if contents change
assert.Equal(t, modifiedAr.Tools, getAssistant.Tools)
assert.Equal(t, modifiedAr.Name, getAssistant.Name)
assert.Equal(t, modifiedAr.Instructions, getAssistant.Instructions)
assert.Equal(t, modifiedAr.Description, getAssistant.Description)
})
t.Run("CreateAssistantFileEndpoint", func(t *testing.T) {
t.Cleanup(tearDown())
file, assistant, err := createFileAndAssistant(t, app, appConfig)
assert.NoError(t, err)
afr := schema.AssistantFileRequest{FileID: file.ID}
af, _, err := createAssistantFile(app, afr, assistant.ID)
assert.NoError(t, err)
assert.Equal(t, assistant.ID, af.AssistantID)
})
t.Run("ListAssistantFilesEndpoint", func(t *testing.T) {
t.Cleanup(tearDown())
file, assistant, err := createFileAndAssistant(t, app, appConfig)
assert.NoError(t, err)
afr := schema.AssistantFileRequest{FileID: file.ID}
af, _, err := createAssistantFile(app, afr, assistant.ID)
assert.NoError(t, err)
assert.Equal(t, assistant.ID, af.AssistantID)
})
t.Run("GetAssistantFileEndpoint", func(t *testing.T) {
t.Cleanup(tearDown())
file, assistant, err := createFileAndAssistant(t, app, appConfig)
assert.NoError(t, err)
afr := schema.AssistantFileRequest{FileID: file.ID}
af, _, err := createAssistantFile(app, afr, assistant.ID)
assert.NoError(t, err)
t.Cleanup(cleanupAssistantFile(t, app, af.ID, af.AssistantID))
target := fmt.Sprintf("/assistants/%s/files/%s", assistant.ID, file.ID)
request := httptest.NewRequest(http.MethodGet, target, nil)
response, err := app.Test(request)
assert.NoError(t, err)
var assistantFile AssistantFile
err = json.NewDecoder(response.Body).Decode(&assistantFile)
assert.NoError(t, err)
assert.Equal(t, af.ID, assistantFile.ID)
assert.Equal(t, af.AssistantID, assistantFile.AssistantID)
})
t.Run("DeleteAssistantFileEndpoint", func(t *testing.T) {
t.Cleanup(tearDown())
file, assistant, err := createFileAndAssistant(t, app, appConfig)
assert.NoError(t, err)
afr := schema.AssistantFileRequest{FileID: file.ID}
af, _, err := createAssistantFile(app, afr, assistant.ID)
assert.NoError(t, err)
cleanupAssistantFile(t, app, af.ID, af.AssistantID)()
assert.Empty(t, AssistantFiles)
})
}
func createFileAndAssistant(t *testing.T, app *fiber.App, o *config.ApplicationConfig) (schema.File, Assistant, error) {
ar := &AssistantRequest{
Model: "ggml-gpt4all-j",
Name: "3.5-turbo",
Description: "Test Assistant",
Instructions: "You are computer science teacher answering student questions",
Tools: []Tool{{Type: Function}},
FileIDs: nil,
Metadata: nil,
}
assistant, _, err := createAssistant(app, *ar)
if err != nil {
return schema.File{}, Assistant{}, err
}
t.Cleanup(cleanupAllAssistants(t, app, []string{assistant.ID}))
file := CallFilesUploadEndpointWithCleanup(t, app, "test.txt", "file", "fine-tune", 5, o)
t.Cleanup(func() {
_, err := CallFilesDeleteEndpoint(t, app, file.ID)
assert.NoError(t, err)
})
return file, assistant, nil
}
func createAssistantFile(app *fiber.App, afr schema.AssistantFileRequest, assistantId string) (AssistantFile, *http.Response, error) {
afrJson, err := json.Marshal(afr)
if err != nil {
return AssistantFile{}, nil, err
}
target := fmt.Sprintf("/assistants/%s/files", assistantId)
request := httptest.NewRequest(http.MethodPost, target, strings.NewReader(string(afrJson)))
request.Header.Set(fiber.HeaderContentType, "application/json")
request.Header.Set("OpenAi-Beta", "assistants=v1")
resp, err := app.Test(request)
if err != nil {
return AssistantFile{}, resp, err
}
var assistantFile AssistantFile
all, err := io.ReadAll(resp.Body)
if err != nil {
return AssistantFile{}, resp, err
}
err = json.NewDecoder(strings.NewReader(string(all))).Decode(&assistantFile)
if err != nil {
return AssistantFile{}, resp, err
}
return assistantFile, resp, nil
}
func createAssistant(app *fiber.App, ar AssistantRequest) (Assistant, *http.Response, error) {
assistant, err := json.Marshal(ar)
if err != nil {
return Assistant{}, nil, err
}
request := httptest.NewRequest(http.MethodPost, "/assistants", strings.NewReader(string(assistant)))
request.Header.Set(fiber.HeaderContentType, "application/json")
request.Header.Set("OpenAi-Beta", "assistants=v1")
resp, err := app.Test(request)
if err != nil {
return Assistant{}, resp, err
}
bodyString, err := io.ReadAll(resp.Body)
if err != nil {
return Assistant{}, resp, err
}
var resultAssistant Assistant
err = json.NewDecoder(strings.NewReader(string(bodyString))).Decode(&resultAssistant)
return resultAssistant, resp, err
}
func cleanupAllAssistants(t *testing.T, app *fiber.App, ids []string) func() {
return func() {
for _, assistant := range ids {
target := fmt.Sprintf("/assistants/%s", assistant)
deleteReq := httptest.NewRequest(http.MethodDelete, target, nil)
_, err := app.Test(deleteReq)
if err != nil {
t.Fatalf("Failed to delete assistant %s: %v", assistant, err)
}
}
}
}
func cleanupAssistantFile(t *testing.T, app *fiber.App, fileId, assistantId string) func() {
return func() {
target := fmt.Sprintf("/assistants/%s/files/%s", assistantId, fileId)
request := httptest.NewRequest(http.MethodDelete, target, nil)
request.Header.Set(fiber.HeaderContentType, "application/json")
request.Header.Set("OpenAi-Beta", "assistants=v1")
resp, err := app.Test(request)
assert.NoError(t, err)
var dafr schema.DeleteAssistantFileResponse
err = json.NewDecoder(resp.Body).Decode(&dafr)
assert.NoError(t, err)
assert.True(t, dafr.Deleted)
}
}

View File

@@ -15,8 +15,8 @@ import (
"github.com/mudler/LocalAI/core/schema" "github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/functions" "github.com/mudler/LocalAI/pkg/functions"
"github.com/mudler/LocalAI/core/templates"
"github.com/mudler/LocalAI/pkg/model" "github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/LocalAI/pkg/templates"
"github.com/rs/zerolog/log" "github.com/rs/zerolog/log"
"github.com/valyala/fasthttp" "github.com/valyala/fasthttp"
@@ -175,7 +175,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat
textContentToReturn = "" textContentToReturn = ""
id = uuid.New().String() id = uuid.New().String()
created = int(time.Now().Unix()) created = int(time.Now().Unix())
input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.OpenAIRequest) input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.OpenAIRequest)
if !ok || input.Model == "" { if !ok || input.Model == "" {
return fiber.ErrBadRequest return fiber.ErrBadRequest

View File

@@ -15,9 +15,9 @@ import (
"github.com/gofiber/fiber/v2" "github.com/gofiber/fiber/v2"
"github.com/google/uuid" "github.com/google/uuid"
"github.com/mudler/LocalAI/core/schema" "github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/core/templates"
"github.com/mudler/LocalAI/pkg/functions" "github.com/mudler/LocalAI/pkg/functions"
"github.com/mudler/LocalAI/pkg/model" "github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/LocalAI/pkg/templates"
"github.com/rs/zerolog/log" "github.com/rs/zerolog/log"
"github.com/valyala/fasthttp" "github.com/valyala/fasthttp"
) )

View File

@@ -12,8 +12,8 @@ import (
"github.com/google/uuid" "github.com/google/uuid"
"github.com/mudler/LocalAI/core/schema" "github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/core/templates"
"github.com/mudler/LocalAI/pkg/model" "github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/LocalAI/pkg/templates"
"github.com/rs/zerolog/log" "github.com/rs/zerolog/log"
) )

View File

@@ -0,0 +1,194 @@
package openai
import (
"errors"
"fmt"
"os"
"path/filepath"
"sync/atomic"
"time"
"github.com/microcosm-cc/bluemonday"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/schema"
"github.com/gofiber/fiber/v2"
"github.com/mudler/LocalAI/pkg/utils"
)
var UploadedFiles []schema.File
const UploadedFilesFile = "uploadedFiles.json"
// UploadFilesEndpoint https://platform.openai.com/docs/api-reference/files/create
func UploadFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
file, err := c.FormFile("file")
if err != nil {
return err
}
// Check the file size
if file.Size > int64(appConfig.UploadLimitMB*1024*1024) {
return c.Status(fiber.StatusBadRequest).SendString(fmt.Sprintf("File size %d exceeds upload limit %d", file.Size, appConfig.UploadLimitMB))
}
purpose := c.FormValue("purpose", "") //TODO put in purpose dirs
if purpose == "" {
return c.Status(fiber.StatusBadRequest).SendString("Purpose is not defined")
}
// Sanitize the filename to prevent directory traversal
filename := utils.SanitizeFileName(file.Filename)
savePath := filepath.Join(appConfig.UploadDir, filename)
// Check if file already exists
if _, err := os.Stat(savePath); !os.IsNotExist(err) {
return c.Status(fiber.StatusBadRequest).SendString("File already exists")
}
err = c.SaveFile(file, savePath)
if err != nil {
return c.Status(fiber.StatusInternalServerError).SendString("Failed to save file: " + bluemonday.StrictPolicy().Sanitize(err.Error()))
}
f := schema.File{
ID: fmt.Sprintf("file-%d", getNextFileId()),
Object: "file",
Bytes: int(file.Size),
CreatedAt: time.Now(),
Filename: file.Filename,
Purpose: purpose,
}
UploadedFiles = append(UploadedFiles, f)
utils.SaveConfig(appConfig.UploadDir, UploadedFilesFile, UploadedFiles)
return c.Status(fiber.StatusOK).JSON(f)
}
}
var currentFileId int64 = 0
func getNextFileId() int64 {
atomic.AddInt64(&currentId, 1)
return currentId
}
// ListFilesEndpoint https://platform.openai.com/docs/api-reference/files/list
// @Summary List files.
// @Success 200 {object} schema.ListFiles "Response"
// @Router /v1/files [get]
func ListFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
var listFiles schema.ListFiles
purpose := c.Query("purpose")
if purpose == "" {
listFiles.Data = UploadedFiles
} else {
for _, f := range UploadedFiles {
if purpose == f.Purpose {
listFiles.Data = append(listFiles.Data, f)
}
}
}
listFiles.Object = "list"
return c.Status(fiber.StatusOK).JSON(listFiles)
}
}
func getFileFromRequest(c *fiber.Ctx) (*schema.File, error) {
id := c.Params("file_id")
if id == "" {
return nil, fmt.Errorf("file_id parameter is required")
}
for _, f := range UploadedFiles {
if id == f.ID {
return &f, nil
}
}
return nil, fmt.Errorf("unable to find file id %s", id)
}
// GetFilesEndpoint is the OpenAI API endpoint to get files https://platform.openai.com/docs/api-reference/files/retrieve
// @Summary Returns information about a specific file.
// @Success 200 {object} schema.File "Response"
// @Router /v1/files/{file_id} [get]
func GetFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
file, err := getFileFromRequest(c)
if err != nil {
return c.Status(fiber.StatusInternalServerError).SendString(bluemonday.StrictPolicy().Sanitize(err.Error()))
}
return c.JSON(file)
}
}
type DeleteStatus struct {
Id string
Object string
Deleted bool
}
// DeleteFilesEndpoint is the OpenAI API endpoint to delete files https://platform.openai.com/docs/api-reference/files/delete
// @Summary Delete a file.
// @Success 200 {object} DeleteStatus "Response"
// @Router /v1/files/{file_id} [delete]
func DeleteFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
file, err := getFileFromRequest(c)
if err != nil {
return c.Status(fiber.StatusInternalServerError).SendString(bluemonday.StrictPolicy().Sanitize(err.Error()))
}
err = os.Remove(filepath.Join(appConfig.UploadDir, file.Filename))
if err != nil {
// If the file doesn't exist then we should just continue to remove it
if !errors.Is(err, os.ErrNotExist) {
return c.Status(fiber.StatusInternalServerError).SendString(bluemonday.StrictPolicy().Sanitize(fmt.Sprintf("Unable to delete file: %s, %v", file.Filename, err)))
}
}
// Remove upload from list
for i, f := range UploadedFiles {
if f.ID == file.ID {
UploadedFiles = append(UploadedFiles[:i], UploadedFiles[i+1:]...)
break
}
}
utils.SaveConfig(appConfig.UploadDir, UploadedFilesFile, UploadedFiles)
return c.JSON(DeleteStatus{
Id: file.ID,
Object: "file",
Deleted: true,
})
}
}
// GetFilesContentsEndpoint is the OpenAI API endpoint to get files content https://platform.openai.com/docs/api-reference/files/retrieve-contents
// @Summary Returns information about a specific file.
// @Success 200 {string} binary "file"
// @Router /v1/files/{file_id}/content [get]
// GetFilesContentsEndpoint
func GetFilesContentsEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
file, err := getFileFromRequest(c)
if err != nil {
return c.Status(fiber.StatusInternalServerError).SendString(bluemonday.StrictPolicy().Sanitize(err.Error()))
}
fileContents, err := os.ReadFile(filepath.Join(appConfig.UploadDir, file.Filename))
if err != nil {
return c.Status(fiber.StatusInternalServerError).SendString(bluemonday.StrictPolicy().Sanitize(err.Error()))
}
return c.Send(fileContents)
}
}

View File

@@ -0,0 +1,301 @@
package openai
import (
"encoding/json"
"fmt"
"io"
"mime/multipart"
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"strings"
"github.com/rs/zerolog/log"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/schema"
"github.com/gofiber/fiber/v2"
utils2 "github.com/mudler/LocalAI/pkg/utils"
"github.com/stretchr/testify/assert"
"testing"
)
func startUpApp() (app *fiber.App, option *config.ApplicationConfig, loader *config.BackendConfigLoader) {
// Preparing the mocked objects
loader = &config.BackendConfigLoader{}
option = &config.ApplicationConfig{
UploadLimitMB: 10,
UploadDir: "test_dir",
}
_ = os.RemoveAll(option.UploadDir)
app = fiber.New(fiber.Config{
BodyLimit: 20 * 1024 * 1024, // sets the limit to 20MB.
})
// Create a Test Server
app.Post("/files", UploadFilesEndpoint(loader, option))
app.Get("/files", ListFilesEndpoint(loader, option))
app.Get("/files/:file_id", GetFilesEndpoint(loader, option))
app.Delete("/files/:file_id", DeleteFilesEndpoint(loader, option))
app.Get("/files/:file_id/content", GetFilesContentsEndpoint(loader, option))
return
}
func TestUploadFileExceedSizeLimit(t *testing.T) {
// Preparing the mocked objects
loader := &config.BackendConfigLoader{}
option := &config.ApplicationConfig{
UploadLimitMB: 10,
UploadDir: "test_dir",
}
_ = os.RemoveAll(option.UploadDir)
app := fiber.New(fiber.Config{
BodyLimit: 20 * 1024 * 1024, // sets the limit to 20MB.
})
// Create a Test Server
app.Post("/files", UploadFilesEndpoint(loader, option))
app.Get("/files", ListFilesEndpoint(loader, option))
app.Get("/files/:file_id", GetFilesEndpoint(loader, option))
app.Delete("/files/:file_id", DeleteFilesEndpoint(loader, option))
app.Get("/files/:file_id/content", GetFilesContentsEndpoint(loader, option))
t.Run("UploadFilesEndpoint file size exceeds limit", func(t *testing.T) {
t.Cleanup(tearDown())
resp, err := CallFilesUploadEndpoint(t, app, "foo.txt", "file", "fine-tune", 11, option)
assert.NoError(t, err)
assert.Equal(t, fiber.StatusBadRequest, resp.StatusCode)
assert.Contains(t, bodyToString(resp, t), "exceeds upload limit")
})
t.Run("UploadFilesEndpoint purpose not defined", func(t *testing.T) {
t.Cleanup(tearDown())
resp, _ := CallFilesUploadEndpoint(t, app, "foo.txt", "file", "", 5, option)
assert.Equal(t, fiber.StatusBadRequest, resp.StatusCode)
assert.Contains(t, bodyToString(resp, t), "Purpose is not defined")
})
t.Run("UploadFilesEndpoint file already exists", func(t *testing.T) {
t.Cleanup(tearDown())
f1 := CallFilesUploadEndpointWithCleanup(t, app, "foo.txt", "file", "fine-tune", 5, option)
resp, err := CallFilesUploadEndpoint(t, app, "foo.txt", "file", "fine-tune", 5, option)
fmt.Println(f1)
fmt.Printf("ERror: %v\n", err)
fmt.Printf("resp: %+v\n", resp)
assert.Equal(t, fiber.StatusBadRequest, resp.StatusCode)
assert.Contains(t, bodyToString(resp, t), "File already exists")
})
t.Run("UploadFilesEndpoint file uploaded successfully", func(t *testing.T) {
t.Cleanup(tearDown())
file := CallFilesUploadEndpointWithCleanup(t, app, "test.txt", "file", "fine-tune", 5, option)
// Check if file exists in the disk
testName := strings.Split(t.Name(), "/")[1]
fileName := testName + "-test.txt"
filePath := filepath.Join(option.UploadDir, utils2.SanitizeFileName(fileName))
_, err := os.Stat(filePath)
assert.False(t, os.IsNotExist(err))
assert.Equal(t, file.Bytes, 5242880)
assert.NotEmpty(t, file.CreatedAt)
assert.Equal(t, file.Filename, fileName)
assert.Equal(t, file.Purpose, "fine-tune")
})
t.Run("ListFilesEndpoint without purpose parameter", func(t *testing.T) {
t.Cleanup(tearDown())
resp, err := CallListFilesEndpoint(t, app, "")
assert.NoError(t, err)
assert.Equal(t, 200, resp.StatusCode)
listFiles := responseToListFile(t, resp)
if len(listFiles.Data) != len(UploadedFiles) {
t.Errorf("Expected %v files, got %v files", len(UploadedFiles), len(listFiles.Data))
}
})
t.Run("ListFilesEndpoint with valid purpose parameter", func(t *testing.T) {
t.Cleanup(tearDown())
_ = CallFilesUploadEndpointWithCleanup(t, app, "test.txt", "file", "fine-tune", 5, option)
resp, err := CallListFilesEndpoint(t, app, "fine-tune")
assert.NoError(t, err)
listFiles := responseToListFile(t, resp)
if len(listFiles.Data) != 1 {
t.Errorf("Expected 1 file, got %v files", len(listFiles.Data))
}
})
t.Run("ListFilesEndpoint with invalid query parameter", func(t *testing.T) {
t.Cleanup(tearDown())
resp, err := CallListFilesEndpoint(t, app, "not-so-fine-tune")
assert.NoError(t, err)
assert.Equal(t, 200, resp.StatusCode)
listFiles := responseToListFile(t, resp)
if len(listFiles.Data) != 0 {
t.Errorf("Expected 0 file, got %v files", len(listFiles.Data))
}
})
t.Run("GetFilesContentsEndpoint get file content", func(t *testing.T) {
t.Cleanup(tearDown())
req := httptest.NewRequest("GET", "/files", nil)
resp, _ := app.Test(req)
assert.Equal(t, 200, resp.StatusCode)
var listFiles schema.ListFiles
if err := json.Unmarshal(bodyToByteArray(resp, t), &listFiles); err != nil {
t.Errorf("Failed to decode response: %v", err)
return
}
if len(listFiles.Data) != 0 {
t.Errorf("Expected 0 file, got %v files", len(listFiles.Data))
}
})
}
func CallListFilesEndpoint(t *testing.T, app *fiber.App, purpose string) (*http.Response, error) {
var target string
if purpose != "" {
target = fmt.Sprintf("/files?purpose=%s", purpose)
} else {
target = "/files"
}
req := httptest.NewRequest("GET", target, nil)
return app.Test(req)
}
func CallFilesContentEndpoint(t *testing.T, app *fiber.App, fileId string) (*http.Response, error) {
request := httptest.NewRequest("GET", "/files?file_id="+fileId, nil)
return app.Test(request)
}
func CallFilesUploadEndpoint(t *testing.T, app *fiber.App, fileName, tag, purpose string, fileSize int, appConfig *config.ApplicationConfig) (*http.Response, error) {
testName := strings.Split(t.Name(), "/")[1]
// Create a file that exceeds the limit
file := createTestFile(t, testName+"-"+fileName, fileSize, appConfig)
// Creating a new HTTP Request
body, writer := newMultipartFile(file.Name(), tag, purpose)
req := httptest.NewRequest(http.MethodPost, "/files", body)
req.Header.Set(fiber.HeaderContentType, writer.FormDataContentType())
return app.Test(req)
}
func CallFilesUploadEndpointWithCleanup(t *testing.T, app *fiber.App, fileName, tag, purpose string, fileSize int, appConfig *config.ApplicationConfig) schema.File {
// Create a file that exceeds the limit
testName := strings.Split(t.Name(), "/")[1]
file := createTestFile(t, testName+"-"+fileName, fileSize, appConfig)
// Creating a new HTTP Request
body, writer := newMultipartFile(file.Name(), tag, purpose)
req := httptest.NewRequest(http.MethodPost, "/files", body)
req.Header.Set(fiber.HeaderContentType, writer.FormDataContentType())
resp, err := app.Test(req)
assert.NoError(t, err)
f := responseToFile(t, resp)
//id := f.ID
//t.Cleanup(func() {
// _, err := CallFilesDeleteEndpoint(t, app, id)
// assert.NoError(t, err)
// assert.Empty(t, UploadedFiles)
//})
return f
}
func CallFilesDeleteEndpoint(t *testing.T, app *fiber.App, fileId string) (*http.Response, error) {
target := fmt.Sprintf("/files/%s", fileId)
req := httptest.NewRequest(http.MethodDelete, target, nil)
return app.Test(req)
}
// Helper to create multi-part file
func newMultipartFile(filePath, tag, purpose string) (*strings.Reader, *multipart.Writer) {
body := new(strings.Builder)
writer := multipart.NewWriter(body)
file, _ := os.Open(filePath)
defer file.Close()
part, _ := writer.CreateFormFile(tag, filepath.Base(filePath))
io.Copy(part, file)
if purpose != "" {
_ = writer.WriteField("purpose", purpose)
}
writer.Close()
return strings.NewReader(body.String()), writer
}
// Helper to create test files
func createTestFile(t *testing.T, name string, sizeMB int, option *config.ApplicationConfig) *os.File {
err := os.MkdirAll(option.UploadDir, 0750)
if err != nil {
t.Fatalf("Error MKDIR: %v", err)
}
file, err := os.Create(name)
assert.NoError(t, err)
file.WriteString(strings.Repeat("a", sizeMB*1024*1024)) // sizeMB MB File
t.Cleanup(func() {
os.Remove(name)
os.RemoveAll(option.UploadDir)
})
return file
}
func bodyToString(resp *http.Response, t *testing.T) string {
return string(bodyToByteArray(resp, t))
}
func bodyToByteArray(resp *http.Response, t *testing.T) []byte {
bodyBytes, err := io.ReadAll(resp.Body)
if err != nil {
t.Fatal(err)
}
return bodyBytes
}
func responseToFile(t *testing.T, resp *http.Response) schema.File {
var file schema.File
responseToString := bodyToString(resp, t)
err := json.NewDecoder(strings.NewReader(responseToString)).Decode(&file)
if err != nil {
t.Errorf("Failed to decode response: %s", err)
}
return file
}
func responseToListFile(t *testing.T, resp *http.Response) schema.ListFiles {
var listFiles schema.ListFiles
responseToString := bodyToString(resp, t)
err := json.NewDecoder(strings.NewReader(responseToString)).Decode(&listFiles)
if err != nil {
log.Error().Err(err).Msg("failed to decode response")
}
return listFiles
}

View File

@@ -79,37 +79,49 @@ func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appCon
return fiber.ErrBadRequest return fiber.ErrBadRequest
} }
// Process input images (for img2img/inpainting)
src := "" src := ""
if input.File != "" { if input.File != "" {
src = processImageFile(input.File, appConfig.GeneratedContentDir)
if src != "" {
defer os.RemoveAll(src)
}
}
// Process multiple input images fileData := []byte{}
var inputImages []string var err error
if len(input.Files) > 0 { // check if input.File is an URL, if so download it and save it
for _, file := range input.Files { // to a temporary file
processedFile := processImageFile(file, appConfig.GeneratedContentDir) if strings.HasPrefix(input.File, "http://") || strings.HasPrefix(input.File, "https://") {
if processedFile != "" { out, err := downloadFile(input.File)
inputImages = append(inputImages, processedFile) if err != nil {
defer os.RemoveAll(processedFile) return fmt.Errorf("failed downloading file:%w", err)
}
defer os.RemoveAll(out)
fileData, err = os.ReadFile(out)
if err != nil {
return fmt.Errorf("failed reading file:%w", err)
}
} else {
// base 64 decode the file and write it somewhere
// that we will cleanup
fileData, err = base64.StdEncoding.DecodeString(input.File)
if err != nil {
return err
} }
} }
}
// Process reference images // Create a temporary file
var refImages []string outputFile, err := os.CreateTemp(appConfig.GeneratedContentDir, "b64")
if len(input.RefImages) > 0 { if err != nil {
for _, file := range input.RefImages { return err
processedFile := processImageFile(file, appConfig.GeneratedContentDir)
if processedFile != "" {
refImages = append(refImages, processedFile)
defer os.RemoveAll(processedFile)
}
} }
// write the base64 result
writer := bufio.NewWriter(outputFile)
_, err = writer.Write(fileData)
if err != nil {
outputFile.Close()
return err
}
outputFile.Close()
src = outputFile.Name()
defer os.RemoveAll(src)
} }
log.Debug().Msgf("Parameter Config: %+v", config) log.Debug().Msgf("Parameter Config: %+v", config)
@@ -190,13 +202,7 @@ func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appCon
baseURL := c.BaseURL() baseURL := c.BaseURL()
// Use the first input image as src if available, otherwise use the original src fn, err := backend.ImageGeneration(height, width, mode, step, *config.Seed, positive_prompt, negative_prompt, src, output, ml, *config, appConfig)
inputSrc := src
if len(inputImages) > 0 {
inputSrc = inputImages[0]
}
fn, err := backend.ImageGeneration(height, width, mode, step, *config.Seed, positive_prompt, negative_prompt, inputSrc, output, ml, *config, appConfig, refImages)
if err != nil { if err != nil {
return err return err
} }
@@ -237,51 +243,3 @@ func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appCon
return c.JSON(resp) return c.JSON(resp)
} }
} }
// processImageFile handles a single image file (URL or base64) and returns the path to the temporary file
func processImageFile(file string, generatedContentDir string) string {
fileData := []byte{}
var err error
// check if file is an URL, if so download it and save it to a temporary file
if strings.HasPrefix(file, "http://") || strings.HasPrefix(file, "https://") {
out, err := downloadFile(file)
if err != nil {
log.Error().Err(err).Msgf("Failed downloading file: %s", file)
return ""
}
defer os.RemoveAll(out)
fileData, err = os.ReadFile(out)
if err != nil {
log.Error().Err(err).Msgf("Failed reading downloaded file: %s", out)
return ""
}
} else {
// base 64 decode the file and write it somewhere that we will cleanup
fileData, err = base64.StdEncoding.DecodeString(file)
if err != nil {
log.Error().Err(err).Msgf("Failed decoding base64 file")
return ""
}
}
// Create a temporary file
outputFile, err := os.CreateTemp(generatedContentDir, "b64")
if err != nil {
log.Error().Err(err).Msg("Failed creating temporary file")
return ""
}
// write the base64 result
writer := bufio.NewWriter(outputFile)
_, err = writer.Write(fileData)
if err != nil {
outputFile.Close()
log.Error().Err(err).Msg("Failed writing to temporary file")
return ""
}
outputFile.Close()
return outputFile.Name()
}

View File

@@ -16,12 +16,12 @@ import (
"github.com/mudler/LocalAI/core/application" "github.com/mudler/LocalAI/core/application"
"github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/http/endpoints/openai/types" "github.com/mudler/LocalAI/core/http/endpoints/openai/types"
"github.com/mudler/LocalAI/core/templates"
laudio "github.com/mudler/LocalAI/pkg/audio" laudio "github.com/mudler/LocalAI/pkg/audio"
"github.com/mudler/LocalAI/pkg/functions" "github.com/mudler/LocalAI/pkg/functions"
"github.com/mudler/LocalAI/pkg/grpc/proto" "github.com/mudler/LocalAI/pkg/grpc/proto"
model "github.com/mudler/LocalAI/pkg/model" model "github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/LocalAI/pkg/sound" "github.com/mudler/LocalAI/pkg/sound"
"github.com/mudler/LocalAI/pkg/templates"
"google.golang.org/grpc" "google.golang.org/grpc"
@@ -29,8 +29,8 @@ import (
) )
const ( const (
localSampleRate = 16000 localSampleRate = 16000
remoteSampleRate = 24000 remoteSampleRate = 24000
) )
// A model can be "emulated" that is: transcribe audio to text -> feed text to the LLM -> generate audio as result // A model can be "emulated" that is: transcribe audio to text -> feed text to the LLM -> generate audio as result
@@ -210,9 +210,9 @@ func registerRealtime(application *application.Application) func(c *websocket.Co
// TODO: Need some way to pass this to the backend // TODO: Need some way to pass this to the backend
Threshold: 0.5, Threshold: 0.5,
// TODO: This is ignored and the amount of padding is random at present // TODO: This is ignored and the amount of padding is random at present
PrefixPaddingMs: 30, PrefixPaddingMs: 30,
SilenceDurationMs: 500, SilenceDurationMs: 500,
CreateResponse: func() *bool { t := true; return &t }(), CreateResponse: func() *bool { t := true; return &t }(),
}, },
}, },
InputAudioTranscription: &types.InputAudioTranscription{ InputAudioTranscription: &types.InputAudioTranscription{
@@ -233,7 +233,7 @@ func registerRealtime(application *application.Application) func(c *websocket.Co
// TODO: The API has no way to configure the VAD model or other models that make up a pipeline to fake any-to-any // TODO: The API has no way to configure the VAD model or other models that make up a pipeline to fake any-to-any
// So possibly we could have a way to configure a composite model that can be used in situations where any-to-any is expected // So possibly we could have a way to configure a composite model that can be used in situations where any-to-any is expected
pipeline := config.Pipeline{ pipeline := config.Pipeline{
VAD: "silero-vad", VAD: "silero-vad",
Transcription: session.InputAudioTranscription.Model, Transcription: session.InputAudioTranscription.Model,
} }
@@ -567,8 +567,8 @@ func updateTransSession(session *Session, update *types.ClientSession, cl *confi
trCur := session.InputAudioTranscription trCur := session.InputAudioTranscription
if trUpd != nil && trUpd.Model != "" && trUpd.Model != trCur.Model { if trUpd != nil && trUpd.Model != "" && trUpd.Model != trCur.Model {
pipeline := config.Pipeline{ pipeline := config.Pipeline {
VAD: "silero-vad", VAD: "silero-vad",
Transcription: trUpd.Model, Transcription: trUpd.Model,
} }
@@ -684,7 +684,7 @@ func handleVAD(cfg *config.BackendConfig, evaluator *templates.Evaluator, sessio
sendEvent(c, types.InputAudioBufferClearedEvent{ sendEvent(c, types.InputAudioBufferClearedEvent{
ServerEventBase: types.ServerEventBase{ ServerEventBase: types.ServerEventBase{
EventID: "event_TODO", EventID: "event_TODO",
Type: types.ServerEventTypeInputAudioBufferCleared, Type: types.ServerEventTypeInputAudioBufferCleared,
}, },
}) })
@@ -697,7 +697,7 @@ func handleVAD(cfg *config.BackendConfig, evaluator *templates.Evaluator, sessio
sendEvent(c, types.InputAudioBufferSpeechStartedEvent{ sendEvent(c, types.InputAudioBufferSpeechStartedEvent{
ServerEventBase: types.ServerEventBase{ ServerEventBase: types.ServerEventBase{
EventID: "event_TODO", EventID: "event_TODO",
Type: types.ServerEventTypeInputAudioBufferSpeechStarted, Type: types.ServerEventTypeInputAudioBufferSpeechStarted,
}, },
AudioStartMs: time.Now().Sub(startTime).Milliseconds(), AudioStartMs: time.Now().Sub(startTime).Milliseconds(),
}) })
@@ -719,7 +719,7 @@ func handleVAD(cfg *config.BackendConfig, evaluator *templates.Evaluator, sessio
sendEvent(c, types.InputAudioBufferSpeechStoppedEvent{ sendEvent(c, types.InputAudioBufferSpeechStoppedEvent{
ServerEventBase: types.ServerEventBase{ ServerEventBase: types.ServerEventBase{
EventID: "event_TODO", EventID: "event_TODO",
Type: types.ServerEventTypeInputAudioBufferSpeechStopped, Type: types.ServerEventTypeInputAudioBufferSpeechStopped,
}, },
AudioEndMs: time.Now().Sub(startTime).Milliseconds(), AudioEndMs: time.Now().Sub(startTime).Milliseconds(),
}) })
@@ -728,9 +728,9 @@ func handleVAD(cfg *config.BackendConfig, evaluator *templates.Evaluator, sessio
sendEvent(c, types.InputAudioBufferCommittedEvent{ sendEvent(c, types.InputAudioBufferCommittedEvent{
ServerEventBase: types.ServerEventBase{ ServerEventBase: types.ServerEventBase{
EventID: "event_TODO", EventID: "event_TODO",
Type: types.ServerEventTypeInputAudioBufferCommitted, Type: types.ServerEventTypeInputAudioBufferCommitted,
}, },
ItemID: generateItemID(), ItemID: generateItemID(),
PreviousItemID: "TODO", PreviousItemID: "TODO",
}) })
@@ -833,9 +833,9 @@ func commitUtterance(ctx context.Context, utt []byte, cfg *config.BackendConfig,
func runVAD(ctx context.Context, session *Session, adata []int16) ([]*proto.VADSegment, error) { func runVAD(ctx context.Context, session *Session, adata []int16) ([]*proto.VADSegment, error) {
soundIntBuffer := &audio.IntBuffer{ soundIntBuffer := &audio.IntBuffer{
Format: &audio.Format{SampleRate: localSampleRate, NumChannels: 1}, Format: &audio.Format{SampleRate: localSampleRate, NumChannels: 1},
SourceBitDepth: 16, SourceBitDepth: 16,
Data: sound.ConvertInt16ToInt(adata), Data: sound.ConvertInt16ToInt(adata),
} }
float32Data := soundIntBuffer.AsFloat32Buffer().Data float32Data := soundIntBuffer.AsFloat32Buffer().Data

View File

@@ -11,9 +11,9 @@ import (
"github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/schema" "github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/core/services" "github.com/mudler/LocalAI/core/services"
"github.com/mudler/LocalAI/core/templates"
"github.com/mudler/LocalAI/pkg/functions" "github.com/mudler/LocalAI/pkg/functions"
"github.com/mudler/LocalAI/pkg/model" "github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/LocalAI/pkg/templates"
"github.com/mudler/LocalAI/pkg/utils" "github.com/mudler/LocalAI/pkg/utils"
"github.com/gofiber/fiber/v2" "github.com/gofiber/fiber/v2"

View File

@@ -23,7 +23,7 @@ func RegisterLocalAIRoutes(router *fiber.App,
// LocalAI API endpoints // LocalAI API endpoints
if !appConfig.DisableGalleryEndpoint { if !appConfig.DisableGalleryEndpoint {
modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.BackendGalleries, appConfig.ModelPath, galleryService) modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.ModelPath, galleryService)
router.Post("/models/apply", modelGalleryEndpointService.ApplyModelGalleryEndpoint()) router.Post("/models/apply", modelGalleryEndpointService.ApplyModelGalleryEndpoint())
router.Post("/models/delete/:name", modelGalleryEndpointService.DeleteModelGalleryEndpoint()) router.Post("/models/delete/:name", modelGalleryEndpointService.DeleteModelGalleryEndpoint())
@@ -41,11 +41,6 @@ func RegisterLocalAIRoutes(router *fiber.App,
router.Get("/backends/jobs/:uuid", backendGalleryEndpointService.GetOpStatusEndpoint()) router.Get("/backends/jobs/:uuid", backendGalleryEndpointService.GetOpStatusEndpoint())
} }
router.Post("/v1/detection",
requestExtractor.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_DETECTION)),
requestExtractor.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.DetectionRequest) }),
localai.DetectionEndpoint(cl, ml, appConfig))
router.Post("/tts", router.Post("/tts",
requestExtractor.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_TTS)), requestExtractor.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_TTS)),
requestExtractor.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.TTSRequest) }), requestExtractor.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.TTSRequest) }),

View File

@@ -54,6 +54,38 @@ func RegisterOpenAIRoutes(app *fiber.App,
app.Post("/completions", completionChain...) app.Post("/completions", completionChain...)
app.Post("/v1/engines/:model/completions", completionChain...) app.Post("/v1/engines/:model/completions", completionChain...)
// assistant
app.Get("/v1/assistants", openai.ListAssistantsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Get("/assistants", openai.ListAssistantsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Post("/v1/assistants", openai.CreateAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Post("/assistants", openai.CreateAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Delete("/v1/assistants/:assistant_id", openai.DeleteAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Delete("/assistants/:assistant_id", openai.DeleteAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Get("/v1/assistants/:assistant_id", openai.GetAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Get("/assistants/:assistant_id", openai.GetAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Post("/v1/assistants/:assistant_id", openai.ModifyAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Post("/assistants/:assistant_id", openai.ModifyAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Get("/v1/assistants/:assistant_id/files", openai.ListAssistantFilesEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Get("/assistants/:assistant_id/files", openai.ListAssistantFilesEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Post("/v1/assistants/:assistant_id/files", openai.CreateAssistantFileEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Post("/assistants/:assistant_id/files", openai.CreateAssistantFileEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Delete("/v1/assistants/:assistant_id/files/:file_id", openai.DeleteAssistantFileEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Delete("/assistants/:assistant_id/files/:file_id", openai.DeleteAssistantFileEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Get("/v1/assistants/:assistant_id/files/:file_id", openai.GetAssistantFileEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Get("/assistants/:assistant_id/files/:file_id", openai.GetAssistantFileEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
// files
app.Post("/v1/files", openai.UploadFilesEndpoint(application.BackendLoader(), application.ApplicationConfig()))
app.Post("/files", openai.UploadFilesEndpoint(application.BackendLoader(), application.ApplicationConfig()))
app.Get("/v1/files", openai.ListFilesEndpoint(application.BackendLoader(), application.ApplicationConfig()))
app.Get("/files", openai.ListFilesEndpoint(application.BackendLoader(), application.ApplicationConfig()))
app.Get("/v1/files/:file_id", openai.GetFilesEndpoint(application.BackendLoader(), application.ApplicationConfig()))
app.Get("/files/:file_id", openai.GetFilesEndpoint(application.BackendLoader(), application.ApplicationConfig()))
app.Delete("/v1/files/:file_id", openai.DeleteFilesEndpoint(application.BackendLoader(), application.ApplicationConfig()))
app.Delete("/files/:file_id", openai.DeleteFilesEndpoint(application.BackendLoader(), application.ApplicationConfig()))
app.Get("/v1/files/:file_id/content", openai.GetFilesContentsEndpoint(application.BackendLoader(), application.ApplicationConfig()))
app.Get("/files/:file_id/content", openai.GetFilesContentsEndpoint(application.BackendLoader(), application.ApplicationConfig()))
// embeddings // embeddings
embeddingChain := []fiber.Handler{ embeddingChain := []fiber.Handler{
re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_EMBEDDINGS)), re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_EMBEDDINGS)),

View File

@@ -180,7 +180,6 @@ func registerGalleryRoutes(app *fiber.App, cl *config.BackendConfigLoader, appCo
ID: uid, ID: uid,
GalleryElementName: galleryID, GalleryElementName: galleryID,
Galleries: appConfig.Galleries, Galleries: appConfig.Galleries,
BackendGalleries: appConfig.BackendGalleries,
} }
go func() { go func() {
galleryService.ModelGalleryChannel <- op galleryService.ModelGalleryChannel <- op
@@ -220,7 +219,6 @@ func registerGalleryRoutes(app *fiber.App, cl *config.BackendConfigLoader, appCo
Delete: true, Delete: true,
GalleryElementName: galleryName, GalleryElementName: galleryName,
Galleries: appConfig.Galleries, Galleries: appConfig.Galleries,
BackendGalleries: appConfig.BackendGalleries,
} }
go func() { go func() {
galleryService.ModelGalleryChannel <- op galleryService.ModelGalleryChannel <- op

View File

@@ -90,14 +90,6 @@
hx-indicator=".htmx-indicator"> hx-indicator=".htmx-indicator">
<i class="fas fa-headphones mr-2"></i>Whisper <i class="fas fa-headphones mr-2"></i>Whisper
</button> </button>
<button hx-post="browse/search/backends"
class="inline-flex items-center rounded-full px-4 py-2 text-sm font-medium bg-red-900/60 text-red-200 border border-red-700/50 hover:bg-red-800 transition duration-200 ease-in-out"
hx-target="#search-results"
hx-vals='{"search": "object-detection"}'
onclick="hidePagination()"
hx-indicator=".htmx-indicator">
<i class="fas fa-eye mr-2"></i>Object detection
</button>
</div> </div>
</div> </div>
</div> </div>

View File

@@ -115,14 +115,6 @@
hx-indicator=".htmx-indicator"> hx-indicator=".htmx-indicator">
<i class="fas fa-headphones mr-2"></i>Audio transcription <i class="fas fa-headphones mr-2"></i>Audio transcription
</button> </button>
<button hx-post="browse/search/models"
class="inline-flex items-center rounded-full px-4 py-2 text-sm font-medium bg-red-900/60 text-red-200 border border-red-700/50 hover:bg-red-800 transition duration-200 ease-in-out"
hx-target="#search-results"
hx-vals='{"search": "object-detection"}'
onclick="hidePagination()"
hx-indicator=".htmx-indicator">
<i class="fas fa-eye mr-2"></i>Object detection
</button>
</div> </div>
</div> </div>

View File

@@ -278,7 +278,6 @@ func ensureService(ctx context.Context, n *node.Node, nd *NodeData, sserv string
port, err := freeport.GetFreePort() port, err := freeport.GetFreePort()
if err != nil { if err != nil {
zlog.Error().Err(err).Msgf("Could not allocate a free port for %s", nd.ID) zlog.Error().Err(err).Msgf("Could not allocate a free port for %s", nd.ID)
cancel()
return return
} }

View File

@@ -120,20 +120,3 @@ type SystemInformationResponse struct {
Backends []string `json:"backends"` Backends []string `json:"backends"`
Models []SysInfoModel `json:"loaded_models"` Models []SysInfoModel `json:"loaded_models"`
} }
type DetectionRequest struct {
BasicModelRequest
Image string `json:"image"`
}
type DetectionResponse struct {
Detections []Detection `json:"detections"`
}
type Detection struct {
X float32 `json:"x"`
Y float32 `json:"y"`
Width float32 `json:"width"`
Height float32 `json:"height"`
ClassName string `json:"class_name"`
}

View File

@@ -2,6 +2,7 @@ package schema
import ( import (
"context" "context"
"time"
functions "github.com/mudler/LocalAI/pkg/functions" functions "github.com/mudler/LocalAI/pkg/functions"
) )
@@ -114,6 +115,37 @@ type OpenAIModel struct {
Object string `json:"object"` Object string `json:"object"`
} }
type DeleteAssistantResponse struct {
ID string `json:"id"`
Object string `json:"object"`
Deleted bool `json:"deleted"`
}
// File represents the structure of a file object from the OpenAI API.
type File struct {
ID string `json:"id"` // Unique identifier for the file
Object string `json:"object"` // Type of the object (e.g., "file")
Bytes int `json:"bytes"` // Size of the file in bytes
CreatedAt time.Time `json:"created_at"` // The time at which the file was created
Filename string `json:"filename"` // The name of the file
Purpose string `json:"purpose"` // The purpose of the file (e.g., "fine-tune", "classifications", etc.)
}
type ListFiles struct {
Data []File
Object string
}
type AssistantFileRequest struct {
FileID string `json:"file_id"`
}
type DeleteAssistantFileResponse struct {
ID string `json:"id"`
Object string `json:"object"`
Deleted bool `json:"deleted"`
}
type ImageGenerationResponseFormat string type ImageGenerationResponseFormat string
type ChatCompletionResponseFormatType string type ChatCompletionResponseFormatType string
@@ -141,10 +173,6 @@ type OpenAIRequest struct {
// whisper // whisper
File string `json:"file" validate:"required"` File string `json:"file" validate:"required"`
// Multiple input images for img2img or inpainting
Files []string `json:"files,omitempty"`
// Reference images for models that support them (e.g., Flux Kontext)
RefImages []string `json:"ref_images,omitempty"`
//whisper/image //whisper/image
ResponseFormat interface{} `json:"response_format,omitempty"` ResponseFormat interface{} `json:"response_format,omitempty"`
// image // image

View File

@@ -2,7 +2,7 @@ package services
import ( import (
"github.com/mudler/LocalAI/core/gallery" "github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/pkg/system" "github.com/mudler/LocalAI/core/system"
"github.com/mudler/LocalAI/pkg/utils" "github.com/mudler/LocalAI/pkg/utils"
"github.com/rs/zerolog/log" "github.com/rs/zerolog/log"
@@ -24,7 +24,6 @@ func (g *GalleryService) backendHandler(op *GalleryOp[gallery.GalleryBackend], s
g.modelLoader.DeleteExternalBackend(op.GalleryElementName) g.modelLoader.DeleteExternalBackend(op.GalleryElementName)
} else { } else {
log.Warn().Msgf("installing backend %s", op.GalleryElementName) log.Warn().Msgf("installing backend %s", op.GalleryElementName)
log.Debug().Msgf("backend galleries: %v", g.appConfig.BackendGalleries)
err = gallery.InstallBackendFromGallery(g.appConfig.BackendGalleries, systemState, op.GalleryElementName, g.appConfig.BackendsPath, progressCallback, true) err = gallery.InstallBackendFromGallery(g.appConfig.BackendGalleries, systemState, op.GalleryElementName, g.appConfig.BackendsPath, progressCallback, true)
if err == nil { if err == nil {
err = gallery.RegisterBackends(g.appConfig.BackendsPath, g.modelLoader) err = gallery.RegisterBackends(g.appConfig.BackendsPath, g.modelLoader)

View File

@@ -7,8 +7,8 @@ import (
"github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/gallery" "github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/system"
"github.com/mudler/LocalAI/pkg/model" "github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/LocalAI/pkg/system"
"github.com/rs/zerolog/log" "github.com/rs/zerolog/log"
) )

View File

@@ -7,7 +7,7 @@ import (
"github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/gallery" "github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/pkg/system" "github.com/mudler/LocalAI/core/system"
"github.com/mudler/LocalAI/pkg/utils" "github.com/mudler/LocalAI/pkg/utils"
"gopkg.in/yaml.v2" "gopkg.in/yaml.v2"
) )

View File

@@ -18,13 +18,6 @@ const (
nvidiaL4T = "nvidia-l4t" nvidiaL4T = "nvidia-l4t"
darwinX86 = "darwin-x86" darwinX86 = "darwin-x86"
metal = "metal" metal = "metal"
nvidia = "nvidia"
amd = "amd"
intel = "intel"
capabilityEnv = "LOCALAI_FORCE_META_BACKEND_CAPABILITY"
capabilityRunFileEnv = "LOCALAI_FORCE_META_BACKEND_CAPABILITY_RUN_FILE"
defaultRunFile = "/run/localai/capability"
) )
func (s *SystemState) Capability(capMap map[string]string) string { func (s *SystemState) Capability(capMap map[string]string) string {
@@ -32,26 +25,21 @@ func (s *SystemState) Capability(capMap map[string]string) string {
// Check if the reported capability is in the map // Check if the reported capability is in the map
if _, exists := capMap[reportedCapability]; exists { if _, exists := capMap[reportedCapability]; exists {
log.Debug().Str("reportedCapability", reportedCapability).Any("capMap", capMap).Msg("Using reported capability")
return reportedCapability return reportedCapability
} }
log.Debug().Str("reportedCapability", reportedCapability).Any("capMap", capMap).Msg("The requested capability was not found, using default capability")
// Otherwise, return the default capability (catch-all) // Otherwise, return the default capability (catch-all)
return defaultCapability return defaultCapability
} }
func (s *SystemState) getSystemCapabilities() string { func (s *SystemState) getSystemCapabilities() string {
capability := os.Getenv(capabilityEnv) if os.Getenv("LOCALAI_FORCE_META_BACKEND_CAPABILITY") != "" {
if capability != "" { return os.Getenv("LOCALAI_FORCE_META_BACKEND_CAPABILITY")
log.Info().Str("capability", capability).Msgf("Using forced capability from environment variable (%s)", capabilityEnv)
return capability
} }
capabilityRunFile := defaultRunFile capabilityRunFile := "/run/localai/capability"
capabilityRunFileEnv := os.Getenv(capabilityRunFileEnv) if os.Getenv("LOCALAI_FORCE_META_BACKEND_CAPABILITY_RUN_FILE") != "" {
if capabilityRunFileEnv != "" { capabilityRunFile = os.Getenv("LOCALAI_FORCE_META_BACKEND_CAPABILITY_RUN_FILE")
capabilityRunFile = capabilityRunFileEnv
} }
// Check if /run/localai/capability exists and use it // Check if /run/localai/capability exists and use it
@@ -60,37 +48,31 @@ func (s *SystemState) getSystemCapabilities() string {
if _, err := os.Stat(capabilityRunFile); err == nil { if _, err := os.Stat(capabilityRunFile); err == nil {
capability, err := os.ReadFile(capabilityRunFile) capability, err := os.ReadFile(capabilityRunFile)
if err == nil { if err == nil {
log.Info().Str("capabilityRunFile", capabilityRunFile).Str("capability", string(capability)).Msgf("Using forced capability run file (%s)", capabilityRunFileEnv) return string(capability)
return strings.Trim(strings.TrimSpace(string(capability)), "\n")
} }
} }
// If we are on mac and arm64, we will return metal // If we are on mac and arm64, we will return metal
if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" { if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" {
log.Info().Msgf("Using metal capability (arm64 on mac), set %s to override", capabilityEnv)
return metal return metal
} }
// If we are on mac and x86, we will return darwin-x86 // If we are on mac and x86, we will return darwin-x86
if runtime.GOOS == "darwin" && runtime.GOARCH == "amd64" { if runtime.GOOS == "darwin" && runtime.GOARCH == "amd64" {
log.Info().Msgf("Using darwin-x86 capability (amd64 on mac), set %s to override", capabilityEnv)
return darwinX86 return darwinX86
} }
// If arm64 on linux and a nvidia gpu is detected, we will return nvidia-l4t // If arm64 on linux and a nvidia gpu is detected, we will return nvidia-l4t
if runtime.GOOS == "linux" && runtime.GOARCH == "arm64" { if runtime.GOOS == "linux" && runtime.GOARCH == "arm64" {
if s.GPUVendor == "nvidia" { if s.GPUVendor == "nvidia" {
log.Info().Msgf("Using nvidia-l4t capability (arm64 on linux), set %s to override", capabilityEnv)
return nvidiaL4T return nvidiaL4T
} }
} }
if s.GPUVendor == "" { if s.GPUVendor == "" {
log.Info().Msgf("Default capability (no GPU detected), set %s to override", capabilityEnv)
return defaultCapability return defaultCapability
} }
log.Info().Str("Capability", s.GPUVendor).Msgf("Capability automatically detected, set %s to override", capabilityEnv)
return s.GPUVendor return s.GPUVendor
} }
@@ -114,16 +96,18 @@ func detectGPUVendor() (string, error) {
if gpu.DeviceInfo.Vendor != nil { if gpu.DeviceInfo.Vendor != nil {
gpuVendorName := strings.ToUpper(gpu.DeviceInfo.Vendor.Name) gpuVendorName := strings.ToUpper(gpu.DeviceInfo.Vendor.Name)
if strings.Contains(gpuVendorName, "NVIDIA") { if strings.Contains(gpuVendorName, "NVIDIA") {
return nvidia, nil return "nvidia", nil
} }
if strings.Contains(gpuVendorName, "AMD") { if strings.Contains(gpuVendorName, "AMD") {
return amd, nil return "amd", nil
} }
if strings.Contains(gpuVendorName, "INTEL") { if strings.Contains(gpuVendorName, "INTEL") {
return intel, nil return "intel", nil
} }
return "nvidia", nil
} }
} }
} }
return "", nil return "", nil

View File

@@ -448,7 +448,7 @@ there are additional environment variables available that modify the behavior of
| Environment variable | Default | Description | | Environment variable | Default | Description |
|----------------------------|---------|------------------------------------------------------------------------------------------------------------| |----------------------------|---------|------------------------------------------------------------------------------------------------------------|
| `REBUILD` | `false` | Rebuild LocalAI on startup | | `REBUILD` | `false` | Rebuild LocalAI on startup |
| `BUILD_TYPE` | | Build type. Available: `cublas`, `openblas`, `clblas`, `intel` (intel core), `sycl_f16`, `sycl_f32` (intel backends) | | `BUILD_TYPE` | | Build type. Available: `cublas`, `openblas`, `clblas` |
| `GO_TAGS` | | Go tags. Available: `stablediffusion` | | `GO_TAGS` | | Go tags. Available: `stablediffusion` |
| `HUGGINGFACEHUB_API_TOKEN` | | Special token for interacting with HuggingFace Inference API, required only when using the `langchain-huggingface` backend | | `HUGGINGFACEHUB_API_TOKEN` | | Special token for interacting with HuggingFace Inference API, required only when using the `langchain-huggingface` backend |
| `EXTRA_BACKENDS` | | A space separated list of backends to prepare. For example `EXTRA_BACKENDS="backend/python/diffusers backend/python/transformers"` prepares the python environment on start | | `EXTRA_BACKENDS` | | A space separated list of backends to prepare. For example `EXTRA_BACKENDS="backend/python/diffusers backend/python/transformers"` prepares the python environment on start |

View File

@@ -15,16 +15,6 @@ This section contains instruction on how to use LocalAI with GPU acceleration.
For acceleration for AMD or Metal HW is still in development, for additional details see the [build]({{%relref "docs/getting-started/build#Acceleration" %}}) For acceleration for AMD or Metal HW is still in development, for additional details see the [build]({{%relref "docs/getting-started/build#Acceleration" %}})
{{% /alert %}} {{% /alert %}}
## Automatic Backend Detection
When you install a model from the gallery (or a YAML file), LocalAI intelligently detects the required backend and your system's capabilities, then downloads the correct version for you. Whether you're running on a standard CPU, an NVIDIA GPU, an AMD GPU, or an Intel GPU, LocalAI handles it automatically.
For advanced use cases or to override auto-detection, you can use the `LOCALAI_FORCE_META_BACKEND_CAPABILITY` environment variable. Here are the available options:
- `default`: Forces CPU-only backend. This is the fallback if no specific hardware is detected.
- `nvidia`: Forces backends compiled with CUDA support for NVIDIA GPUs.
- `amd`: Forces backends compiled with ROCm support for AMD GPUs.
- `intel`: Forces backends compiled with SYCL/oneAPI support for Intel GPUs.
## Model configuration ## Model configuration
@@ -81,8 +71,8 @@ To use CUDA, use the images with the `cublas` tag, for example.
The image list is on [quay](https://quay.io/repository/go-skynet/local-ai?tab=tags): The image list is on [quay](https://quay.io/repository/go-skynet/local-ai?tab=tags):
- CUDA `11` tags: `master-gpu-nvidia-cuda-11`, `v1.40.0-gpu-nvidia-cuda-11`, ... - CUDA `11` tags: `master-gpu-nvidia-cuda11`, `v1.40.0-gpu-nvidia-cuda11`, ...
- CUDA `12` tags: `master-gpu-nvidia-cuda-12`, `v1.40.0-gpu-nvidia-cuda-12`, ... - CUDA `12` tags: `master-gpu-nvidia-cuda12`, `v1.40.0-gpu-nvidia-cuda12`, ...
In addition to the commands to run LocalAI normally, you need to specify `--gpus all` to docker, for example: In addition to the commands to run LocalAI normally, you need to specify `--gpus all` to docker, for example:
@@ -267,7 +257,7 @@ If building from source, you need to install [Intel oneAPI Base Toolkit](https:/
### Container images ### Container images
To use SYCL, use the images with `gpu-intel` in the tag, for example `{{< version >}}-gpu-intel`, ... To use SYCL, use the images with the `gpu-intel-f16` or `gpu-intel-f32` tag, for example `{{< version >}}-gpu-intel-f32-core`, `{{< version >}}-gpu-intel-f16`, ...
The image list is on [quay](https://quay.io/repository/go-skynet/local-ai?tab=tags). The image list is on [quay](https://quay.io/repository/go-skynet/local-ai?tab=tags).
@@ -276,7 +266,7 @@ The image list is on [quay](https://quay.io/repository/go-skynet/local-ai?tab=ta
To run LocalAI with Docker and sycl starting `phi-2`, you can use the following command as an example: To run LocalAI with Docker and sycl starting `phi-2`, you can use the following command as an example:
```bash ```bash
docker run -e DEBUG=true --privileged -ti -v $PWD/models:/models -p 8080:8080 -v /dev/dri:/dev/dri --rm quay.io/go-skynet/local-ai:master-gpu-intel phi-2 docker run -e DEBUG=true --privileged -ti -v $PWD/models:/models -p 8080:8080 -v /dev/dri:/dev/dri --rm quay.io/go-skynet/local-ai:master-gpu-intel-f32 phi-2
``` ```
### Notes ### Notes
@@ -284,7 +274,7 @@ docker run -e DEBUG=true --privileged -ti -v $PWD/models:/models -p 8080:8080 -
In addition to the commands to run LocalAI normally, you need to specify `--device /dev/dri` to docker, for example: In addition to the commands to run LocalAI normally, you need to specify `--device /dev/dri` to docker, for example:
```bash ```bash
docker run --rm -ti --device /dev/dri -p 8080:8080 -e DEBUG=true -e MODELS_PATH=/models -e THREADS=1 -v $PWD/models:/models quay.io/go-skynet/local-ai:{{< version >}}-gpu-intel docker run --rm -ti --device /dev/dri -p 8080:8080 -e DEBUG=true -e MODELS_PATH=/models -e THREADS=1 -v $PWD/models:/models quay.io/go-skynet/local-ai:{{< version >}}-gpu-intel-f16
``` ```
Note also that sycl does have a known issue to hang with `mmap: true`. You have to disable it in the model configuration if explicitly enabled. Note also that sycl does have a known issue to hang with `mmap: true`. You have to disable it in the model configuration if explicitly enabled.

View File

@@ -96,8 +96,8 @@ Your backend container should:
For getting started, see the available backends in LocalAI here: https://github.com/mudler/LocalAI/tree/master/backend . For getting started, see the available backends in LocalAI here: https://github.com/mudler/LocalAI/tree/master/backend .
- For Python based backends there is a template that can be used as starting point: https://github.com/mudler/LocalAI/tree/master/backend/python/common/template . - For Python based backends there is a template that can be used as starting point: https://github.com/mudler/LocalAI/tree/master/backend/python/common/template .
- For Golang based backends, you can see the `bark-cpp` backend as an example: https://github.com/mudler/LocalAI/tree/master/backend/go/bark-cpp - For Golang based backends, you can see the `bark-cpp` backend as an example: https://github.com/mudler/LocalAI/tree/master/backend/go/bark
- For C++ based backends, you can see the `llama-cpp` backend as an example: https://github.com/mudler/LocalAI/tree/master/backend/cpp/llama-cpp - For C++ based backends, you can see the `llama-cpp` backend as an example: https://github.com/mudler/LocalAI/tree/master/backend/cpp/llama
### Publishing Your Backend ### Publishing Your Backend

View File

@@ -1,193 +0,0 @@
+++
disableToc = false
title = "🔍 Object detection"
weight = 13
url = "/features/object-detection/"
+++
LocalAI supports object detection through various backends. This feature allows you to identify and locate objects within images with high accuracy and real-time performance. Currently, [RF-DETR](https://github.com/roboflow/rf-detr) is available as an implementation.
## Overview
Object detection in LocalAI is implemented through dedicated backends that can identify and locate objects within images. Each backend provides different capabilities and model architectures.
**Key Features:**
- Real-time object detection
- High accuracy detection with bounding boxes
- Support for multiple hardware accelerators (CPU, NVIDIA GPU, Intel GPU, AMD GPU)
- Structured detection results with confidence scores
- Easy integration through the `/v1/detection` endpoint
## Usage
### Detection Endpoint
LocalAI provides a dedicated `/v1/detection` endpoint for object detection tasks. This endpoint is specifically designed for object detection and returns structured detection results with bounding boxes and confidence scores.
### API Reference
To perform object detection, send a POST request to the `/v1/detection` endpoint:
```bash
curl -X POST http://localhost:8080/v1/detection \
-H "Content-Type: application/json" \
-d '{
"model": "rfdetr-base",
"image": "https://media.roboflow.com/dog.jpeg"
}'
```
### Request Format
The request body should contain:
- `model`: The name of the object detection model (e.g., "rfdetr-base")
- `image`: The image to analyze, which can be:
- A URL to an image
- A base64-encoded image
### Response Format
The API returns a JSON response with detected objects:
```json
{
"detections": [
{
"x": 100.5,
"y": 150.2,
"width": 200.0,
"height": 300.0,
"confidence": 0.95,
"class_name": "dog"
},
{
"x": 400.0,
"y": 200.0,
"width": 150.0,
"height": 250.0,
"confidence": 0.87,
"class_name": "person"
}
]
}
```
Each detection includes:
- `x`, `y`: Coordinates of the bounding box top-left corner
- `width`, `height`: Dimensions of the bounding box
- `confidence`: Detection confidence score (0.0 to 1.0)
- `class_name`: The detected object class
## Backends
### RF-DETR Backend
The RF-DETR backend is implemented as a Python-based gRPC service that integrates seamlessly with LocalAI. It provides object detection capabilities using the RF-DETR model architecture and supports multiple hardware configurations:
- **CPU**: Optimized for CPU inference
- **NVIDIA GPU**: CUDA acceleration for NVIDIA GPUs
- **Intel GPU**: Intel oneAPI optimization
- **AMD GPU**: ROCm acceleration for AMD GPUs
- **NVIDIA Jetson**: Optimized for ARM64 NVIDIA Jetson devices
#### Setup
1. **Using the Model Gallery (Recommended)**
The easiest way to get started is using the model gallery. The `rfdetr-base` model is available in the official LocalAI gallery:
```bash
# Install and run the rfdetr-base model
local-ai run rfdetr-base
```
You can also install it through the web interface by navigating to the Models section and searching for "rfdetr-base".
2. **Manual Configuration**
Create a model configuration file in your `models` directory:
```yaml
name: rfdetr
backend: rfdetr
parameters:
model: rfdetr-base
```
#### Available Models
Currently, the following model is available in the [Model Gallery]({{%relref "docs/features/model-gallery" %}}):
- **rfdetr-base**: Base model with balanced performance and accuracy
You can browse and install this model through the LocalAI web interface or using the command line.
## Examples
### Basic Object Detection
```bash
# Detect objects in an image from URL
curl -X POST http://localhost:8080/v1/detection \
-H "Content-Type: application/json" \
-d '{
"model": "rfdetr-base",
"image": "https://example.com/image.jpg"
}'
```
### Base64 Image Detection
```bash
# Convert image to base64 and send
base64_image=$(base64 -w 0 image.jpg)
curl -X POST http://localhost:8080/v1/detection \
-H "Content-Type: application/json" \
-d "{
\"model\": \"rfdetr-base\",
\"image\": \"data:image/jpeg;base64,$base64_image\"
}"
```
## Troubleshooting
### Common Issues
1. **Model Loading Errors**
- Ensure the model file is properly downloaded
- Check available disk space
- Verify model compatibility with your backend version
2. **Low Detection Accuracy**
- Ensure good image quality and lighting
- Check if objects are clearly visible
- Consider using a larger model for better accuracy
3. **Slow Performance**
- Enable GPU acceleration if available
- Use a smaller model for faster inference
- Optimize image resolution
### Debug Mode
Enable debug logging for troubleshooting:
```bash
local-ai run --debug rfdetr-base
```
## Object Detection Category
LocalAI includes a dedicated **object-detection** category for models and backends that specialize in identifying and locating objects within images. This category currently includes:
- **RF-DETR**: Real-time transformer-based object detection
Additional object detection models and backends will be added to this category in the future. You can filter models by the `object-detection` tag in the model gallery to find all available object detection models.
## Related Features
- [🎨 Image generation]({{%relref "docs/features/image-generation" %}}): Generate images with AI
- [📖 Text generation]({{%relref "docs/features/text-generation" %}}): Generate text with language models
- [🔍 GPT Vision]({{%relref "docs/features/gpt-vision" %}}): Analyze images with language models
- [🚀 GPU acceleration]({{%relref "docs/features/GPU-acceleration" %}}): Optimize performance with GPU acceleration

View File

@@ -9,11 +9,13 @@ ico = "rocket_launch"
### Build ### Build
LocalAI can be built as a container image or as a single, portable binary. Note that some model architectures might require Python libraries, which are not included in the binary. LocalAI can be built as a container image or as a single, portable binary. Note that some model architectures might require Python libraries, which are not included in the binary. The binary contains only the core backends written in Go and C++.
LocalAI's extensible architecture allows you to add your own backends, which can be written in any language, and as such the container images contains also the Python dependencies to run all the available backends (for example, in order to run backends like __Diffusers__ that allows to generate images and videos from text). LocalAI's extensible architecture allows you to add your own backends, which can be written in any language, and as such the container images contains also the Python dependencies to run all the available backends (for example, in order to run backends like __Diffusers__ that allows to generate images and videos from text).
This section contains instructions on how to build LocalAI from source. In some cases you might want to re-build LocalAI from source (for instance to leverage Apple Silicon acceleration), or to build a custom container image with your own backends. This section contains instructions on how to build LocalAI from source.
#### Build LocalAI locally #### Build LocalAI locally
@@ -22,6 +24,7 @@ This section contains instructions on how to build LocalAI from source.
In order to build LocalAI locally, you need the following requirements: In order to build LocalAI locally, you need the following requirements:
- Golang >= 1.21 - Golang >= 1.21
- Cmake/make
- GCC - GCC
- GRPC - GRPC
@@ -33,14 +36,20 @@ To install the dependencies follow the instructions below:
Install `xcode` from the App Store Install `xcode` from the App Store
```bash ```bash
brew install go protobuf protoc-gen-go protoc-gen-go-grpc wget brew install abseil cmake go grpc protobuf protoc-gen-go protoc-gen-go-grpc python wget
```
After installing the above dependencies, you need to install grpcio-tools from PyPI. You could do this via a pip --user install or a virtualenv.
```bash
pip install --user grpcio-tools
``` ```
{{% /tab %}} {{% /tab %}}
{{% tab tabName="Debian" %}} {{% tab tabName="Debian" %}}
```bash ```bash
apt install golang make protobuf-compiler-grpc apt install cmake golang libgrpc-dev make protobuf-compiler-grpc python3-grpc-tools
``` ```
After you have golang installed and working, you can install the required binaries for compiling the golang protobuf components via the following commands After you have golang installed and working, you can install the required binaries for compiling the golang protobuf components via the following commands
@@ -54,8 +63,10 @@ go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f1
{{% /tab %}} {{% /tab %}}
{{% tab tabName="From source" %}} {{% tab tabName="From source" %}}
Specify `BUILD_GRPC_FOR_BACKEND_LLAMA=true` to build automatically the gRPC dependencies
```bash ```bash
make build make ... BUILD_GRPC_FOR_BACKEND_LLAMA=true build
``` ```
{{% /tab %}} {{% /tab %}}
@@ -72,6 +83,36 @@ make build
This should produce the binary `local-ai` This should produce the binary `local-ai`
Here is the list of the variables available that can be used to customize the build:
| Variable | Default | Description |
| ---------------------| ------- | ----------- |
| `BUILD_TYPE` | None | Build type. Available: `cublas`, `openblas`, `clblas`, `metal`,`hipblas`, `sycl_f16`, `sycl_f32` |
| `GO_TAGS` | `tts stablediffusion` | Go tags. Available: `stablediffusion`, `tts` |
| `CLBLAST_DIR` | | Specify a CLBlast directory |
| `CUDA_LIBPATH` | | Specify a CUDA library path |
| `BUILD_API_ONLY` | false | Set to true to build only the API (no backends will be built) |
{{% alert note %}}
#### CPU flagset compatibility
LocalAI uses different backends based on ggml and llama.cpp to run models. If your CPU doesn't support common instruction sets, you can disable them during build:
```
CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_AVX=OFF -DGGML_FMA=OFF" make build
```
To have effect on the container image, you need to set `REBUILD=true`:
```
docker run quay.io/go-skynet/localai
docker run --rm -ti -p 8080:8080 -e DEBUG=true -e MODELS_PATH=/models -e THREADS=1 -e REBUILD=true -e CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_AVX=OFF -DGGML_FMA=OFF" -v $PWD/models:/models quay.io/go-skynet/local-ai:latest
```
{{% /alert %}}
#### Container image #### Container image
Requirements: Requirements:
@@ -112,9 +153,6 @@ wget https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q2_K.gguf -O
# Use a template from the examples # Use a template from the examples
cp -rf prompt-templates/ggml-gpt4all-j.tmpl models/phi-2.Q2_K.tmpl cp -rf prompt-templates/ggml-gpt4all-j.tmpl models/phi-2.Q2_K.tmpl
# Install the llama-cpp backend
./local-ai backends install llama-cpp
# Run LocalAI # Run LocalAI
./local-ai --models-path=./models/ --debug=true ./local-ai --models-path=./models/ --debug=true
@@ -148,53 +186,131 @@ sudo xcode-select --switch /Applications/Xcode.app/Contents/Developer
``` ```
# reinstall build dependencies # reinstall build dependencies
brew reinstall go grpc protobuf wget brew reinstall abseil cmake go grpc protobuf wget
make clean make clean
make build make build
``` ```
## Build backends **Requirements**: OpenCV, Gomp
LocalAI have several backends available for installation in the backend gallery. The backends can be also built by source. As backends might vary from language and dependencies that they require, the documentation will provide generic guidance for few of the backends, which can be applied with some slight modifications also to the others. Image generation requires `GO_TAGS=stablediffusion` to be set during build:
### Manually
Typically each backend include a Makefile which allow to package the backend.
In the LocalAI repository, for instance you can build `bark-cpp` by doing:
``` ```
git clone https://github.com/go-skynet/LocalAI.git make GO_TAGS=stablediffusion build
# Build the bark-cpp backend (requires cmake)
make -C LocalAI/backend/go/bark-cpp build package
# Build vllm backend (requires python)
make -C LocalAI/backend/python/vllm
``` ```
### With Docker ### Build with Text to audio support
Building with docker is simpler as abstracts away all the requirement, and focuses on building the final OCI images that are available in the gallery. This allows for instance also to build locally a backend and install it with LocalAI. You can refer to [Backends](https://localai.io/backends/) for general guidance on how to install and develop backends. **Requirements**: piper-phonemize
In the LocalAI repository, you can build `bark-cpp` by doing: Text to audio support is experimental and requires `GO_TAGS=tts` to be set during build:
``` ```
git clone https://github.com/go-skynet/LocalAI.git make GO_TAGS=tts build
# Build the bark-cpp backend (requires docker)
make docker-build-bark-cpp
``` ```
Note that `make` is only by convenience, in reality it just runs a simple `docker` command as: ### Acceleration
#### OpenBLAS
Software acceleration.
Requirements: OpenBLAS
```
make BUILD_TYPE=openblas build
```
#### CuBLAS
Nvidia Acceleration.
Requirement: Nvidia CUDA toolkit
Note: CuBLAS support is experimental, and has not been tested on real HW. please report any issues you find!
```
make BUILD_TYPE=cublas build
```
More informations available in the upstream PR: https://github.com/ggerganov/llama.cpp/pull/1412
#### Hipblas (AMD GPU with ROCm on Arch Linux)
Packages:
```
pacman -S base-devel git rocm-hip-sdk rocm-opencl-sdk opencv clblast grpc
```
Library links:
```
export CGO_CFLAGS="-I/usr/include/opencv4"
export CGO_CXXFLAGS="-I/usr/include/opencv4"
export CGO_LDFLAGS="-L/opt/rocm/hip/lib -lamdhip64 -L/opt/rocm/lib -lOpenCL -L/usr/lib -lclblast -lrocblas -lhipblas -lrocrand -lomp -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link"
```
Build:
```
make BUILD_TYPE=hipblas GPU_TARGETS=gfx1030
```
#### ClBLAS
AMD/Intel GPU acceleration.
Requirement: OpenCL, CLBlast
```
make BUILD_TYPE=clblas build
```
To specify a clblast dir set: `CLBLAST_DIR`
#### Intel GPU acceleration
Intel GPU acceleration is supported via SYCL.
Requirements: [Intel oneAPI Base Toolkit](https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit-download.html) (see also [llama.cpp setup installations instructions](https://github.com/ggerganov/llama.cpp/blob/d71ac90985854b0905e1abba778e407e17f9f887/README-sycl.md?plain=1#L56))
```
make BUILD_TYPE=sycl_f16 build # for float16
make BUILD_TYPE=sycl_f32 build # for float32
```
#### Metal (Apple Silicon)
```
make build
# correct build type is automatically used on mac (BUILD_TYPE=metal)
# Set `gpu_layers: 256` (or equal to the number of model layers) to your YAML model config file and `f16: true`
```
### Windows compatibility
Make sure to give enough resources to the running container. See https://github.com/go-skynet/LocalAI/issues/2
### Examples
More advanced build options are available, for instance to build only a single backend.
#### Build only a single backend
You can control the backends that are built by setting the `GRPC_BACKENDS` environment variable. For instance, to build only the `llama-cpp` backend only:
```bash ```bash
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:bark-cpp -f LocalAI/backend/Dockerfile.golang --build-arg BACKEND=bark-cpp . make GRPC_BACKENDS=backend-assets/grpc/llama-cpp build
``` ```
Note: By default, all the backends are built.
- BUILD_TYPE can be either: `cublas`, `hipblas`, `sycl_f16`, `sycl_f32`, `metal`. #### Specific llama.cpp version
- BASE_IMAGE is tested on `ubuntu:22.04` (and defaults to it) and `quay.io/go-skynet/intel-oneapi-base:latest` for intel/sycl
To build with a specific version of llama.cpp, set `CPPLLAMA_VERSION` to the tag or wanted sha:
```
CPPLLAMA_VERSION=<sha> make build
```

View File

@@ -131,7 +131,8 @@ docker run -p 8080:8080 --name local-ai -ti -v localai-models:/models localai/lo
| Latest images for Nvidia GPU (CUDA11) | `quay.io/go-skynet/local-ai:latest-aio-gpu-nvidia-cuda-11` | `localai/localai:latest-aio-gpu-nvidia-cuda-11` | | Latest images for Nvidia GPU (CUDA11) | `quay.io/go-skynet/local-ai:latest-aio-gpu-nvidia-cuda-11` | `localai/localai:latest-aio-gpu-nvidia-cuda-11` |
| Latest images for Nvidia GPU (CUDA12) | `quay.io/go-skynet/local-ai:latest-aio-gpu-nvidia-cuda-12` | `localai/localai:latest-aio-gpu-nvidia-cuda-12` | | Latest images for Nvidia GPU (CUDA12) | `quay.io/go-skynet/local-ai:latest-aio-gpu-nvidia-cuda-12` | `localai/localai:latest-aio-gpu-nvidia-cuda-12` |
| Latest images for AMD GPU | `quay.io/go-skynet/local-ai:latest-aio-gpu-hipblas` | `localai/localai:latest-aio-gpu-hipblas` | | Latest images for AMD GPU | `quay.io/go-skynet/local-ai:latest-aio-gpu-hipblas` | `localai/localai:latest-aio-gpu-hipblas` |
| Latest images for Intel GPU | `quay.io/go-skynet/local-ai:latest-aio-gpu-intel` | `localai/localai:latest-aio-gpu-intel` | | Latest images for Intel GPU (sycl f16) | `quay.io/go-skynet/local-ai:latest-aio-gpu-intel-f16` | `localai/localai:latest-aio-gpu-intel-f16` |
| Latest images for Intel GPU (sycl f32) | `quay.io/go-skynet/local-ai:latest-aio-gpu-intel-f32` | `localai/localai:latest-aio-gpu-intel-f32` |
### Available environment variables ### Available environment variables
@@ -162,9 +163,9 @@ Standard container images do not have pre-installed models.
| Description | Quay | Docker Hub | | Description | Quay | Docker Hub |
| --- | --- |-------------------------------------------------------------| | --- | --- |-------------------------------------------------------------|
| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-gpu-nvidia-cuda-11` | `localai/localai:master-gpu-nvidia-cuda-11` | | Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-gpu-nvidia-cuda11` | `localai/localai:master-gpu-nvidia-cuda11` |
| Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-nvidia-cuda-11` | `localai/localai:latest-gpu-nvidia-cuda-11` | | Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-nvidia-cuda-11` | `localai/localai:latest-gpu-nvidia-cuda-11` |
| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-gpu-nvidia-cuda-11` | `localai/localai:{{< version >}}-gpu-nvidia-cuda-11` | | Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-gpu-nvidia-cuda11` | `localai/localai:{{< version >}}-gpu-nvidia-cuda11` |
{{% /tab %}} {{% /tab %}}
@@ -172,19 +173,29 @@ Standard container images do not have pre-installed models.
| Description | Quay | Docker Hub | | Description | Quay | Docker Hub |
| --- | --- |-------------------------------------------------------------| | --- | --- |-------------------------------------------------------------|
| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-gpu-nvidia-cuda-12` | `localai/localai:master-gpu-nvidia-cuda-12` | | Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-gpu-nvidia-cuda12` | `localai/localai:master-gpu-nvidia-cuda12` |
| Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-nvidia-cuda-12` | `localai/localai:latest-gpu-nvidia-cuda-12` | | Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-nvidia-cuda-12` | `localai/localai:latest-gpu-nvidia-cuda-12` |
| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-gpu-nvidia-cuda-12` | `localai/localai:{{< version >}}-gpu-nvidia-cuda-12` | | Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-gpu-nvidia-cuda12` | `localai/localai:{{< version >}}-gpu-nvidia-cuda12` |
{{% /tab %}} {{% /tab %}}
{{% tab tabName="Intel GPU" %}} {{% tab tabName="Intel GPU (sycl f16)" %}}
| Description | Quay | Docker Hub | | Description | Quay | Docker Hub |
| --- | --- |-------------------------------------------------------------| | --- | --- |-------------------------------------------------------------|
| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-gpu-intel` | `localai/localai:master-gpu-intel` | | Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-gpu-intel-f16` | `localai/localai:master-gpu-intel-f16` |
| Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-intel` | `localai/localai:latest-gpu-intel` | | Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-intel-f16` | `localai/localai:latest-gpu-intel-f16` |
| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-gpu-intel` | `localai/localai:{{< version >}}-gpu-intel` | | Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-gpu-intel-f16` | `localai/localai:{{< version >}}-gpu-intel-f16` |
{{% /tab %}}
{{% tab tabName="Intel GPU (sycl f32)" %}}
| Description | Quay | Docker Hub |
| --- | --- |-------------------------------------------------------------|
| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-gpu-intel-f32` | `localai/localai:master-gpu-intel-f32` |
| Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-intel-f32` | `localai/localai:latest-gpu-intel-f32` |
| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-gpu-intel-f32` | `localai/localai:{{< version >}}-gpu-intel-f32` |
{{% /tab %}} {{% /tab %}}

View File

@@ -59,7 +59,11 @@ docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri
#### Intel GPU Images (oneAPI): #### Intel GPU Images (oneAPI):
```bash ```bash
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-intel # Intel GPU with FP16 support
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-intel-f16
# Intel GPU with FP32 support
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-intel-f32
``` ```
#### Vulkan GPU Images: #### Vulkan GPU Images:
@@ -81,7 +85,7 @@ docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-ai
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-11 docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-11
# Intel GPU version # Intel GPU version
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-gpu-intel docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-gpu-intel-f16
# AMD GPU version # AMD GPU version
docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri --group-add=video localai/localai:latest-aio-gpu-hipblas docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri --group-add=video localai/localai:latest-aio-gpu-hipblas
@@ -102,9 +106,6 @@ local-ai run https://gist.githubusercontent.com/.../phi-2.yaml
local-ai run oci://localai/phi-2:latest local-ai run oci://localai/phi-2:latest
``` ```
{{% alert icon="⚡" %}}
**Automatic Backend Detection**: When you install models from the gallery or YAML files, LocalAI automatically detects your system's GPU capabilities (NVIDIA, AMD, Intel) and downloads the appropriate backend. For advanced configuration options, see [GPU Acceleration]({{% relref "docs/features/gpu-acceleration#automatic-backend-detection" %}}).
{{% /alert %}}
For a full list of options, refer to the [Installer Options]({{% relref "docs/advanced/installer" %}}) documentation. For a full list of options, refer to the [Installer Options]({{% relref "docs/advanced/installer" %}}) documentation.
@@ -153,7 +154,7 @@ For instructions on using AIO images, see [Using container images]({{% relref "d
LocalAI is part of the Local family stack, along with LocalAGI and LocalRecall. LocalAI is part of the Local family stack, along with LocalAGI and LocalRecall.
[LocalAGI](https://github.com/mudler/LocalAGI) is a powerful, self-hostable AI Agent platform designed for maximum privacy and flexibility which encompassess and uses all the software stack. It provides a complete drop-in replacement for OpenAI's Responses APIs with advanced agentic capabilities, working entirely locally on consumer-grade hardware (CPU and GPU). [LocalAGI](https://github.com/mudler/LocalAGI) is a powerful, self-hostable AI Agent platform designed for maximum privacy and flexibility which encompassess and uses all the softwre stack. It provides a complete drop-in replacement for OpenAI's Responses APIs with advanced agentic capabilities, working entirely locally on consumer-grade hardware (CPU and GPU).
### Quick Start ### Quick Start

View File

@@ -1,3 +1,3 @@
{ {
"version": "v3.3.0" "version": "v3.1.1"
} }

View File

@@ -660,7 +660,7 @@ install_docker() {
IMAGE_TAG= IMAGE_TAG=
if [ "$USE_VULKAN" = true ]; then if [ "$USE_VULKAN" = true ]; then
IMAGE_TAG=${LOCALAI_VERSION}-gpu-vulkan IMAGE_TAG=${LOCALAI_VERSION}-vulkan
info "Starting LocalAI Docker container..." info "Starting LocalAI Docker container..."
$SUDO docker run -v local-ai-data:/models \ $SUDO docker run -v local-ai-data:/models \
@@ -672,7 +672,7 @@ install_docker() {
-d -p $PORT:8080 --name local-ai localai/localai:$IMAGE_TAG $STARTCOMMAND -d -p $PORT:8080 --name local-ai localai/localai:$IMAGE_TAG $STARTCOMMAND
elif [ "$HAS_CUDA" ]; then elif [ "$HAS_CUDA" ]; then
# Default to CUDA 12 # Default to CUDA 12
IMAGE_TAG=${LOCALAI_VERSION}-gpu-nvidia-cuda-12 IMAGE_TAG=${LOCALAI_VERSION}-gpu-nvidia-cuda12
# AIO # AIO
if [ "$USE_AIO" = true ]; then if [ "$USE_AIO" = true ]; then
IMAGE_TAG=${LOCALAI_VERSION}-aio-gpu-nvidia-cuda-12 IMAGE_TAG=${LOCALAI_VERSION}-aio-gpu-nvidia-cuda-12
@@ -715,10 +715,11 @@ install_docker() {
$envs \ $envs \
-d -p $PORT:8080 --name local-ai localai/localai:$IMAGE_TAG $STARTCOMMAND -d -p $PORT:8080 --name local-ai localai/localai:$IMAGE_TAG $STARTCOMMAND
elif [ "$HAS_INTEL" ]; then elif [ "$HAS_INTEL" ]; then
IMAGE_TAG=${LOCALAI_VERSION}-gpu-intel # Default to FP32 for better compatibility
IMAGE_TAG=${LOCALAI_VERSION}-gpu-intel-f32
# AIO # AIO
if [ "$USE_AIO" = true ]; then if [ "$USE_AIO" = true ]; then
IMAGE_TAG=${LOCALAI_VERSION}-aio-gpu-intel IMAGE_TAG=${LOCALAI_VERSION}-aio-gpu-intel-f32
fi fi
info "Starting LocalAI Docker container..." info "Starting LocalAI Docker container..."
@@ -756,7 +757,7 @@ install_binary_darwin() {
[ "$(uname -s)" = "Darwin" ] || fatal 'This script is intended to run on macOS only.' [ "$(uname -s)" = "Darwin" ] || fatal 'This script is intended to run on macOS only.'
info "Downloading LocalAI ${LOCALAI_VERSION}..." info "Downloading LocalAI ${LOCALAI_VERSION}..."
curl --fail --show-error --location --progress-bar -o $TEMP_DIR/local-ai "https://github.com/mudler/LocalAI/releases/download/${LOCALAI_VERSION}/local-ai-${LOCALAI_VERSION}-darwin-${ARCH}" curl --fail --show-error --location --progress-bar -o $TEMP_DIR/local-ai "https://github.com/mudler/LocalAI/releases/download/${LOCALAI_VERSION}/local-ai-Darwin-${ARCH}"
info "Installing to /usr/local/bin/local-ai" info "Installing to /usr/local/bin/local-ai"
install -o0 -g0 -m755 $TEMP_DIR/local-ai /usr/local/bin/local-ai install -o0 -g0 -m755 $TEMP_DIR/local-ai /usr/local/bin/local-ai
@@ -788,7 +789,7 @@ install_binary() {
fi fi
info "Downloading LocalAI ${LOCALAI_VERSION}..." info "Downloading LocalAI ${LOCALAI_VERSION}..."
curl --fail --location --progress-bar -o $TEMP_DIR/local-ai "https://github.com/mudler/LocalAI/releases/download/${LOCALAI_VERSION}/local-ai-${LOCALAI_VERSION}-linux-${ARCH}" curl --fail --location --progress-bar -o $TEMP_DIR/local-ai "https://github.com/mudler/LocalAI/releases/download/${LOCALAI_VERSION}/local-ai-Linux-${ARCH}"
for BINDIR in /usr/local/bin /usr/bin /bin; do for BINDIR in /usr/local/bin /usr/bin /bin; do
echo $PATH | grep -q $BINDIR && break || continue echo $PATH | grep -q $BINDIR && break || continue
@@ -867,7 +868,7 @@ OS="$(uname -s)"
ARCH=$(uname -m) ARCH=$(uname -m)
case "$ARCH" in case "$ARCH" in
x86_64) ARCH="amd64" ;; x86_64) ARCH="x86_64" ;;
aarch64|arm64) ARCH="arm64" ;; aarch64|arm64) ARCH="arm64" ;;
*) fatal "Unsupported architecture: $ARCH" ;; *) fatal "Unsupported architecture: $ARCH" ;;
esac esac

View File

@@ -1,54 +1,4 @@
--- ---
- &afm
name: "arcee-ai_afm-4.5b"
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
icon: https://cdn-uploads.huggingface.co/production/uploads/6435718aaaef013d1aec3b8b/Lj9YVLIKKdImV_jID0A1g.png
license: aml
urls:
- https://huggingface.co/arcee-ai/AFM-4.5B
- https://huggingface.co/bartowski/arcee-ai_AFM-4.5B-GGUF
tags:
- gguf
- gpu
- gpu
- text-generation
description: |
AFM-4.5B is a 4.5 billion parameter instruction-tuned model developed by Arcee.ai, designed for enterprise-grade performance across diverse deployment environments from cloud to edge. The base model was trained on a dataset of 8 trillion tokens, comprising 6.5 trillion tokens of general pretraining data followed by 1.5 trillion tokens of midtraining data with enhanced focus on mathematical reasoning and code generation. Following pretraining, the model underwent supervised fine-tuning on high-quality instruction datasets. The instruction-tuned model was further refined through reinforcement learning on verifiable rewards as well as for human preference. We use a modified version of TorchTitan for pretraining, Axolotl for supervised fine-tuning, and a modified version of Verifiers for reinforcement learning.
The development of AFM-4.5B prioritized data quality as a fundamental requirement for achieving robust model performance. We collaborated with DatologyAI, a company specializing in large-scale data curation. DatologyAI's curation pipeline integrates a suite of proprietary algorithms—model-based quality filtering, embedding-based curation, target distribution-matching, source mixing, and synthetic data. Their expertise enabled the creation of a curated dataset tailored to support strong real-world performance.
The model architecture follows a standard transformer decoder-only design based on Vaswani et al., incorporating several key modifications for enhanced performance and efficiency. Notable architectural features include grouped query attention for improved inference efficiency and ReLU^2 activation functions instead of SwiGLU to enable sparsification while maintaining or exceeding performance benchmarks.
The model available in this repo is the instruct model following supervised fine-tuning and reinforcement learning.
overrides:
parameters:
model: arcee-ai_AFM-4.5B-Q4_K_M.gguf
files:
- filename: arcee-ai_AFM-4.5B-Q4_K_M.gguf
sha256: f05516b323f581bebae1af2cbf900d83a2569b0a60c54366daf4a9c15ae30d4f
uri: huggingface://bartowski/arcee-ai_AFM-4.5B-GGUF/arcee-ai_AFM-4.5B-Q4_K_M.gguf
- &rfdetr
name: "rfdetr-base"
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
icon: https://avatars.githubusercontent.com/u/53104118?s=200&v=4
license: apache-2.0
description: |
RF-DETR is a real-time, transformer-based object detection model architecture developed by Roboflow and released under the Apache 2.0 license.
RF-DETR is the first real-time model to exceed 60 AP on the Microsoft COCO benchmark alongside competitive performance at base sizes. It also achieves state-of-the-art performance on RF100-VL, an object detection benchmark that measures model domain adaptability to real world problems. RF-DETR is fastest and most accurate for its size when compared current real-time objection models.
RF-DETR is small enough to run on the edge using Inference, making it an ideal model for deployments that need both strong accuracy and real-time performance.
tags:
- object-detection
- rfdetr
- gpu
- cpu
urls:
- https://github.com/roboflow/rf-detr
overrides:
backend: rfdetr
parameters:
model: rfdetr-base
known_usecases:
- detection
- name: "dream-org_dream-v0-instruct-7b" - name: "dream-org_dream-v0-instruct-7b"
# chatml # chatml
url: "github:mudler/LocalAI/gallery/chatml.yaml@master" url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
@@ -1906,43 +1856,6 @@
- filename: Menlo_Lucy-128k-Q4_K_M.gguf - filename: Menlo_Lucy-128k-Q4_K_M.gguf
sha256: fb3e591cccc5d2821f3c615fd6dc2ca86d409f56fbc124275510a9612a90e61f sha256: fb3e591cccc5d2821f3c615fd6dc2ca86d409f56fbc124275510a9612a90e61f
uri: huggingface://bartowski/Menlo_Lucy-128k-GGUF/Menlo_Lucy-128k-Q4_K_M.gguf uri: huggingface://bartowski/Menlo_Lucy-128k-GGUF/Menlo_Lucy-128k-Q4_K_M.gguf
- !!merge <<: *qwen3
name: "qwen_qwen3-30b-a3b-instruct-2507"
urls:
- https://huggingface.co/Qwen/Qwen3-30B-A3B-Instruct-2507
- https://huggingface.co/bartowski/Qwen_Qwen3-30B-A3B-Instruct-2507-GGUF
description: |
We introduce the updated version of the Qwen3-30B-A3B non-thinking mode, named Qwen3-30B-A3B-Instruct-2507, featuring the following key enhancements:
Significant improvements in general capabilities, including instruction following, logical reasoning, text comprehension, mathematics, science, coding and tool usage.
Substantial gains in long-tail knowledge coverage across multiple languages.
Markedly better alignment with user preferences in subjective and open-ended tasks, enabling more helpful responses and higher-quality text generation.
Enhanced capabilities in 256K long-context understanding.
overrides:
parameters:
model: Qwen_Qwen3-30B-A3B-Instruct-2507-Q4_K_M.gguf
files:
- filename: Qwen_Qwen3-30B-A3B-Instruct-2507-Q4_K_M.gguf
sha256: 382b4f5a164d200f93790ee0e339fae12852896d23485cfb203ce868fea33a95
uri: huggingface://bartowski/Qwen_Qwen3-30B-A3B-Instruct-2507-GGUF/Qwen_Qwen3-30B-A3B-Instruct-2507-Q4_K_M.gguf
- !!merge <<: *qwen3
name: "qwen_qwen3-30b-a3b-thinking-2507"
urls:
- https://huggingface.co/Qwen/Qwen3-30B-A3B-Thinking-2507
- https://huggingface.co/bartowski/Qwen_Qwen3-30B-A3B-Thinking-2507-GGUF
description: |
Over the past three months, we have continued to scale the thinking capability of Qwen3-30B-A3B, improving both the quality and depth of reasoning. We are pleased to introduce Qwen3-30B-A3B-Thinking-2507, featuring the following key enhancements:
Significantly improved performance on reasoning tasks, including logical reasoning, mathematics, science, coding, and academic benchmarks that typically require human expertise.
Markedly better general capabilities, such as instruction following, tool usage, text generation, and alignment with human preferences.
Enhanced 256K long-context understanding capabilities.
NOTE: This version has an increased thinking length. We strongly recommend its use in highly complex reasoning tasks.
overrides:
parameters:
model: Qwen_Qwen3-30B-A3B-Thinking-2507-Q4_K_M.gguf
files:
- filename: Qwen_Qwen3-30B-A3B-Thinking-2507-Q4_K_M.gguf
sha256: 1359aa08e2f2dfe7ce4b5ff88c4c996e6494c9d916b1ebacd214bb74bbd5a9db
uri: huggingface://bartowski/Qwen_Qwen3-30B-A3B-Thinking-2507-GGUF/Qwen_Qwen3-30B-A3B-Thinking-2507-Q4_K_M.gguf
- &gemma3 - &gemma3
url: "github:mudler/LocalAI/gallery/gemma.yaml@master" url: "github:mudler/LocalAI/gallery/gemma.yaml@master"
name: "gemma-3-27b-it" name: "gemma-3-27b-it"
@@ -19144,148 +19057,6 @@
overrides: overrides:
parameters: parameters:
model: SicariusSicariiStuff/flux.1dev-abliteratedv2 model: SicariusSicariiStuff/flux.1dev-abliteratedv2
- name: flux.1-kontext-dev
license: flux-1-dev-non-commercial-license
url: "github:mudler/LocalAI/gallery/flux-ggml.yaml@master"
icon: https://huggingface.co/black-forest-labs/FLUX.1-Kontext-dev/media/main/teaser.png
description: |
FLUX.1 Kontext [dev] is a 12 billion parameter rectified flow transformer capable of editing images based on text instructions. For more information, please read our blog post and our technical report. You can find information about the [pro] version in here.
Key Features
Change existing images based on an edit instruction.
Have character, style and object reference without any finetuning.
Robust consistency allows users to refine an image through multiple successive edits with minimal visual drift.
Trained using guidance distillation, making FLUX.1 Kontext [dev] more efficient.
Open weights to drive new scientific research, and empower artists to develop innovative workflows.
Generated outputs can be used for personal, scientific, and commercial purposes, as described in the FLUX.1 [dev] Non-Commercial License.
urls:
- https://huggingface.co/black-forest-labs/FLUX.1-Kontext-dev
- https://huggingface.co/QuantStack/FLUX.1-Kontext-dev-GGUF
tags:
- image-to-image
- flux
- gpu
- cpu
overrides:
parameters:
model: flux1-kontext-dev-Q8_0.gguf
files:
- filename: "flux1-kontext-dev-Q8_0.gguf"
sha256: "ff2ff71c3755c8ab394398a412252c23382a83138b65190b16e736d457b80f73"
uri: "huggingface://QuantStack/FLUX.1-Kontext-dev-GGUF/flux1-kontext-dev-Q8_0.gguf"
- filename: ae.safetensors
sha256: afc8e28272cd15db3919bacdb6918ce9c1ed22e96cb12c4d5ed0fba823529e38
uri: https://huggingface.co/ChuckMcSneed/FLUX.1-dev/resolve/main/ae.safetensors
- filename: clip_l.safetensors
sha256: 660c6f5b1abae9dc498ac2d21e1347d2abdb0cf6c0c0c8576cd796491d9a6cdd
uri: https://huggingface.co/comfyanonymous/flux_text_encoders/resolve/main/clip_l.safetensors
- filename: t5xxl_fp16.safetensors
sha256: 6e480b09fae049a72d2a8c5fbccb8d3e92febeb233bbe9dfe7256958a9167635
uri: https://huggingface.co/comfyanonymous/flux_text_encoders/resolve/main/t5xxl_fp16.safetensors
- !!merge <<: *flux
name: flux.1-dev-ggml-q8_0
license: flux-1-dev-non-commercial-license
url: "github:mudler/LocalAI/gallery/flux-ggml.yaml@master"
urls:
- https://huggingface.co/black-forest-labs/FLUX.1-dev
- https://huggingface.co/city96/FLUX.1-dev-gguf
overrides:
parameters:
model: flux1-dev-Q8_0.gguf
files:
- filename: "flux1-dev-Q8_0.gguf"
sha256: "129032f32224bf7138f16e18673d8008ba5f84c1ec74063bf4511a8bb4cf553d"
uri: "huggingface://city96/FLUX.1-dev-gguf/flux1-dev-Q8_0.gguf"
- filename: ae.safetensors
sha256: afc8e28272cd15db3919bacdb6918ce9c1ed22e96cb12c4d5ed0fba823529e38
uri: https://huggingface.co/ChuckMcSneed/FLUX.1-dev/resolve/main/ae.safetensors
- filename: clip_l.safetensors
sha256: 660c6f5b1abae9dc498ac2d21e1347d2abdb0cf6c0c0c8576cd796491d9a6cdd
uri: https://huggingface.co/comfyanonymous/flux_text_encoders/resolve/main/clip_l.safetensors
- filename: t5xxl_fp16.safetensors
sha256: 6e480b09fae049a72d2a8c5fbccb8d3e92febeb233bbe9dfe7256958a9167635
uri: https://huggingface.co/comfyanonymous/flux_text_encoders/resolve/main/t5xxl_fp16.safetensors
- !!merge <<: *flux
name: flux.1-dev-ggml-abliterated-v2-q8_0
url: "github:mudler/LocalAI/gallery/flux-ggml.yaml@master"
description: |
FLUX.1 [dev] is an abliterated version of FLUX.1 [dev]
urls:
- https://huggingface.co/black-forest-labs/FLUX.1-dev
- https://huggingface.co/t8star/flux.1-dev-abliterated-V2-GGUF
overrides:
parameters:
model: T8-flux.1-dev-abliterated-V2-GGUF-Q8_0.gguf
files:
- filename: "T8-flux.1-dev-abliterated-V2-GGUF-Q8_0.gguf"
sha256: "aba8163ff644018da195212a1c33aeddbf802a0c2bba96abc584a2d0b6b42272"
uri: "huggingface://t8star/flux.1-dev-abliterated-V2-GGUF/T8-flux.1-dev-abliterated-V2-GGUF-Q8_0.gguf"
- filename: ae.safetensors
sha256: afc8e28272cd15db3919bacdb6918ce9c1ed22e96cb12c4d5ed0fba823529e38
uri: https://huggingface.co/ChuckMcSneed/FLUX.1-dev/resolve/main/ae.safetensors
- filename: clip_l.safetensors
sha256: 660c6f5b1abae9dc498ac2d21e1347d2abdb0cf6c0c0c8576cd796491d9a6cdd
uri: https://huggingface.co/comfyanonymous/flux_text_encoders/resolve/main/clip_l.safetensors
- filename: t5xxl_fp16.safetensors
sha256: 6e480b09fae049a72d2a8c5fbccb8d3e92febeb233bbe9dfe7256958a9167635
uri: https://huggingface.co/comfyanonymous/flux_text_encoders/resolve/main/t5xxl_fp16.safetensors
- !!merge <<: *flux
name: flux.1-krea-dev-ggml
url: "github:mudler/LocalAI/gallery/flux-ggml.yaml@master"
description: |
FLUX.1 Krea [dev] is a 12 billion parameter rectified flow transformer capable of generating images from text descriptions. For more information, please read our blog post and Krea's blog post.
Cutting-edge output quality, with a focus on aesthetic photography.
Competitive prompt following, matching the performance of closed source alternatives.
Trained using guidance distillation, making FLUX.1 Krea [dev] more efficient.
Open weights to drive new scientific research, and empower artists to develop innovative workflows.
Generated outputs can be used for personal, scientific, and commercial purposes, as described in the flux-1-dev-non-commercial-license.
urls:
- https://huggingface.co/black-forest-labs/FLUX.1-Krea-dev
- https://huggingface.co/QuantStack/FLUX.1-Krea-dev-GGUF
overrides:
parameters:
model: flux1-krea-dev-Q4_K_M.gguf
files:
- filename: "flux1-krea-dev-Q4_K_M.gguf"
sha256: "cf199b88509be2b3476a3372ff03eaaa662cb2b5d3710abf939ebb4838dbdcaf"
uri: "huggingface://QuantStack/FLUX.1-Krea-dev-GGUF/flux1-krea-dev-Q4_K_M.gguf"
- filename: ae.safetensors
sha256: afc8e28272cd15db3919bacdb6918ce9c1ed22e96cb12c4d5ed0fba823529e38
uri: https://huggingface.co/ChuckMcSneed/FLUX.1-dev/resolve/main/ae.safetensors
- filename: clip_l.safetensors
sha256: 660c6f5b1abae9dc498ac2d21e1347d2abdb0cf6c0c0c8576cd796491d9a6cdd
uri: https://huggingface.co/comfyanonymous/flux_text_encoders/resolve/main/clip_l.safetensors
- filename: t5xxl_fp16.safetensors
sha256: 6e480b09fae049a72d2a8c5fbccb8d3e92febeb233bbe9dfe7256958a9167635
uri: https://huggingface.co/comfyanonymous/flux_text_encoders/resolve/main/t5xxl_fp16.safetensors
- !!merge <<: *flux
name: flux.1-krea-dev-ggml-q8_0
url: "github:mudler/LocalAI/gallery/flux-ggml.yaml@master"
description: |
FLUX.1 Krea [dev] is a 12 billion parameter rectified flow transformer capable of generating images from text descriptions. For more information, please read our blog post and Krea's blog post.
Cutting-edge output quality, with a focus on aesthetic photography.
Competitive prompt following, matching the performance of closed source alternatives.
Trained using guidance distillation, making FLUX.1 Krea [dev] more efficient.
Open weights to drive new scientific research, and empower artists to develop innovative workflows.
Generated outputs can be used for personal, scientific, and commercial purposes, as described in the flux-1-dev-non-commercial-license.
urls:
- https://huggingface.co/black-forest-labs/FLUX.1-Krea-dev
- https://huggingface.co/markury/FLUX.1-Krea-dev-gguf
overrides:
parameters:
model: flux1-krea-dev-Q8_0.gguf
files:
- filename: "flux1-krea-dev-Q8_0.gguf"
sha256: "0d085b1e3ae0b90e5dbf74da049a80a565617de622a147d28ee37a07761fbd90"
uri: "huggingface://markury/FLUX.1-Krea-dev-gguf/flux1-krea-dev-Q8_0.gguf"
- filename: ae.safetensors
sha256: afc8e28272cd15db3919bacdb6918ce9c1ed22e96cb12c4d5ed0fba823529e38
uri: https://huggingface.co/ChuckMcSneed/FLUX.1-dev/resolve/main/ae.safetensors
- filename: clip_l.safetensors
sha256: 660c6f5b1abae9dc498ac2d21e1347d2abdb0cf6c0c0c8576cd796491d9a6cdd
uri: https://huggingface.co/comfyanonymous/flux_text_encoders/resolve/main/clip_l.safetensors
- filename: t5xxl_fp16.safetensors
sha256: 6e480b09fae049a72d2a8c5fbccb8d3e92febeb233bbe9dfe7256958a9167635
uri: https://huggingface.co/comfyanonymous/flux_text_encoders/resolve/main/t5xxl_fp16.safetensors
- &whisper - &whisper
url: "github:mudler/LocalAI/gallery/whisper-base.yaml@master" ## Whisper url: "github:mudler/LocalAI/gallery/whisper-base.yaml@master" ## Whisper
name: "whisper-1" name: "whisper-1"

View File

@@ -9,7 +9,7 @@ import (
var embeds = map[string]*embedBackend{} var embeds = map[string]*embedBackend{}
func Provide(addr string, llm AIModel) { func Provide(addr string, llm LLM) {
embeds[addr] = &embedBackend{s: &server{llm: llm}} embeds[addr] = &embedBackend{s: &server{llm: llm}}
} }
@@ -42,7 +42,6 @@ type Backend interface {
GenerateVideo(ctx context.Context, in *pb.GenerateVideoRequest, opts ...grpc.CallOption) (*pb.Result, error) GenerateVideo(ctx context.Context, in *pb.GenerateVideoRequest, opts ...grpc.CallOption) (*pb.Result, error)
TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOption) (*pb.Result, error) TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOption) (*pb.Result, error)
SoundGeneration(ctx context.Context, in *pb.SoundGenerationRequest, opts ...grpc.CallOption) (*pb.Result, error) SoundGeneration(ctx context.Context, in *pb.SoundGenerationRequest, opts ...grpc.CallOption) (*pb.Result, error)
Detect(ctx context.Context, in *pb.DetectOptions, opts ...grpc.CallOption) (*pb.DetectResponse, error)
AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*pb.TranscriptResult, error) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*pb.TranscriptResult, error)
TokenizeString(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.TokenizationResponse, error) TokenizeString(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.TokenizationResponse, error)
Status(ctx context.Context) (*pb.StatusResponse, error) Status(ctx context.Context) (*pb.StatusResponse, error)

View File

@@ -69,10 +69,6 @@ func (llm *Base) SoundGeneration(*pb.SoundGenerationRequest) error {
return fmt.Errorf("unimplemented") return fmt.Errorf("unimplemented")
} }
func (llm *Base) Detect(*pb.DetectOptions) (pb.DetectResponse, error) {
return pb.DetectResponse{}, fmt.Errorf("unimplemented")
}
func (llm *Base) TokenizeString(opts *pb.PredictOptions) (pb.TokenizationResponse, error) { func (llm *Base) TokenizeString(opts *pb.PredictOptions) (pb.TokenizationResponse, error) {
return pb.TokenizationResponse{}, fmt.Errorf("unimplemented") return pb.TokenizationResponse{}, fmt.Errorf("unimplemented")
} }

View File

@@ -504,25 +504,3 @@ func (c *Client) VAD(ctx context.Context, in *pb.VADRequest, opts ...grpc.CallOp
client := pb.NewBackendClient(conn) client := pb.NewBackendClient(conn)
return client.VAD(ctx, in, opts...) return client.VAD(ctx, in, opts...)
} }
func (c *Client) Detect(ctx context.Context, in *pb.DetectOptions, opts ...grpc.CallOption) (*pb.DetectResponse, error) {
if !c.parallel {
c.opMutex.Lock()
defer c.opMutex.Unlock()
}
c.setBusy(true)
defer c.setBusy(false)
c.wdMark()
defer c.wdUnMark()
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
grpc.WithDefaultCallOptions(
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
))
if err != nil {
return nil, err
}
defer conn.Close()
client := pb.NewBackendClient(conn)
return client.Detect(ctx, in, opts...)
}

View File

@@ -59,10 +59,6 @@ func (e *embedBackend) SoundGeneration(ctx context.Context, in *pb.SoundGenerati
return e.s.SoundGeneration(ctx, in) return e.s.SoundGeneration(ctx, in)
} }
func (e *embedBackend) Detect(ctx context.Context, in *pb.DetectOptions, opts ...grpc.CallOption) (*pb.DetectResponse, error) {
return e.s.Detect(ctx, in)
}
func (e *embedBackend) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*pb.TranscriptResult, error) { func (e *embedBackend) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*pb.TranscriptResult, error) {
return e.s.AudioTranscription(ctx, in) return e.s.AudioTranscription(ctx, in)
} }

View File

@@ -4,7 +4,7 @@ import (
pb "github.com/mudler/LocalAI/pkg/grpc/proto" pb "github.com/mudler/LocalAI/pkg/grpc/proto"
) )
type AIModel interface { type LLM interface {
Busy() bool Busy() bool
Lock() Lock()
Unlock() Unlock()
@@ -15,7 +15,6 @@ type AIModel interface {
Embeddings(*pb.PredictOptions) ([]float32, error) Embeddings(*pb.PredictOptions) ([]float32, error)
GenerateImage(*pb.GenerateImageRequest) error GenerateImage(*pb.GenerateImageRequest) error
GenerateVideo(*pb.GenerateVideoRequest) error GenerateVideo(*pb.GenerateVideoRequest) error
Detect(*pb.DetectOptions) (pb.DetectResponse, error)
AudioTranscription(*pb.TranscriptRequest) (pb.TranscriptResult, error) AudioTranscription(*pb.TranscriptRequest) (pb.TranscriptResult, error)
TTS(*pb.TTSRequest) error TTS(*pb.TTSRequest) error
SoundGeneration(*pb.SoundGenerationRequest) error SoundGeneration(*pb.SoundGenerationRequest) error

View File

@@ -22,7 +22,7 @@ import (
// server is used to implement helloworld.GreeterServer. // server is used to implement helloworld.GreeterServer.
type server struct { type server struct {
pb.UnimplementedBackendServer pb.UnimplementedBackendServer
llm AIModel llm LLM
} }
func (s *server) Health(ctx context.Context, in *pb.HealthMessage) (*pb.Reply, error) { func (s *server) Health(ctx context.Context, in *pb.HealthMessage) (*pb.Reply, error) {
@@ -111,18 +111,6 @@ func (s *server) SoundGeneration(ctx context.Context, in *pb.SoundGenerationRequ
return &pb.Result{Message: "Sound Generation audio generated", Success: true}, nil return &pb.Result{Message: "Sound Generation audio generated", Success: true}, nil
} }
func (s *server) Detect(ctx context.Context, in *pb.DetectOptions) (*pb.DetectResponse, error) {
if s.llm.Locking() {
s.llm.Lock()
defer s.llm.Unlock()
}
res, err := s.llm.Detect(in)
if err != nil {
return nil, err
}
return &res, nil
}
func (s *server) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest) (*pb.TranscriptResult, error) { func (s *server) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest) (*pb.TranscriptResult, error) {
if s.llm.Locking() { if s.llm.Locking() {
s.llm.Lock() s.llm.Lock()
@@ -263,7 +251,7 @@ func (s *server) VAD(ctx context.Context, in *pb.VADRequest) (*pb.VADResponse, e
return &res, nil return &res, nil
} }
func StartServer(address string, model AIModel) error { func StartServer(address string, model LLM) error {
lis, err := net.Listen("tcp", address) lis, err := net.Listen("tcp", address)
if err != nil { if err != nil {
return err return err
@@ -281,7 +269,7 @@ func StartServer(address string, model AIModel) error {
return nil return nil
} }
func RunServer(address string, model AIModel) (func() error, error) { func RunServer(address string, model LLM) (func() error, error) {
lis, err := net.Listen("tcp", address) lis, err := net.Listen("tcp", address)
if err != nil { if err != nil {
return nil, err return nil, err

View File

@@ -8,8 +8,8 @@ import (
"github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/gallery" "github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/system"
"github.com/mudler/LocalAI/pkg/downloader" "github.com/mudler/LocalAI/pkg/downloader"
"github.com/mudler/LocalAI/pkg/system"
"github.com/rs/zerolog/log" "github.com/rs/zerolog/log"
) )

View File

@@ -10,8 +10,8 @@ import (
"github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/gallery" "github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/system"
"github.com/mudler/LocalAI/pkg/downloader" "github.com/mudler/LocalAI/pkg/downloader"
"github.com/mudler/LocalAI/pkg/system"
"github.com/mudler/LocalAI/pkg/utils" "github.com/mudler/LocalAI/pkg/utils"
"github.com/rs/zerolog/log" "github.com/rs/zerolog/log"
"gopkg.in/yaml.v2" "gopkg.in/yaml.v2"

View File

@@ -6,7 +6,7 @@ import (
"path/filepath" "path/filepath"
"github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/config"
. "github.com/mudler/LocalAI/core/startup" . "github.com/mudler/LocalAI/pkg/startup"
. "github.com/onsi/ginkgo/v2" . "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega" . "github.com/onsi/gomega"

View File

@@ -3,8 +3,8 @@ package templates_test
import ( import (
"github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/schema" "github.com/mudler/LocalAI/core/schema"
. "github.com/mudler/LocalAI/core/templates"
"github.com/mudler/LocalAI/pkg/functions" "github.com/mudler/LocalAI/pkg/functions"
. "github.com/mudler/LocalAI/pkg/templates"
. "github.com/onsi/ginkgo/v2" . "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega" . "github.com/onsi/gomega"

View File

@@ -1,7 +1,7 @@
package templates_test package templates_test
import ( import (
. "github.com/mudler/LocalAI/core/templates" // Update with your module path . "github.com/mudler/LocalAI/pkg/templates" // Update with your module path
// Update with your module path // Update with your module path
. "github.com/onsi/ginkgo/v2" . "github.com/onsi/ginkgo/v2"

Some files were not shown because too many files have changed in this diff Show More