mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-11 02:07:27 -04:00
* dev knowledge.go structure Signed-off-by: Pete Chen <petechentw@gmail.com> * feat(agents): append KB source citations to responses Render structured KB citations as a Sources block after agent responses, linking each source to the existing raw collection entry endpoint. Keep long-term memory writes on the original model response so citation blocks do not get stored back into the knowledge base. Tested with: go test ./core/services/agents Assisted-by: Codex:gpt-5 Signed-off-by: Pete Chen <petechentw@gmail.com> * Collect KB citations from tool searches Signed-off-by: Pete Chen <petechentw@gmail.com> * fix(agents): append KB sources in local chats Apply the shared KB citation post-processing to standalone LocalAGI chat responses so the React agent chat receives the same clickable Sources block as the native executor path. Also fix the run target to use the current cmd/local-ai entrypoint. Assisted-by: Codex:gpt-5 Signed-off-by: Pete Chen <petechentw@gmail.com> --------- Signed-off-by: Pete Chen <petechentw@gmail.com> Co-authored-by: shihyunhuang <shihyunhuang88@gmail.com> Co-authored-by: TLoE419 <tloemizuchizu@gmail.com> Co-authored-by: Ching Kao <0980124jim@gmail.com>
1437 lines
70 KiB
Makefile
1437 lines
70 KiB
Makefile
# Disable parallel execution for backend builds
|
|
.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/turboquant backends/outetts backends/piper backends/stablediffusion-ggml backends/whisper backends/crispasr backends/parakeet-cpp backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/rfdetr-cpp backends/insightface backends/speaker-recognition backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/mlx-distributed backends/stablediffusion-ggml-darwin backends/vllm backends/vllm-omni backends/sglang backends/moonshine backends/pocket-tts backends/qwen-tts backends/faster-qwen3-tts backends/qwen-asr backends/nemo backends/voxcpm backends/whisperx backends/ace-step backends/acestep-cpp backends/fish-speech backends/voxtral backends/opus backends/trl backends/llama-cpp-quantization backends/kokoros backends/sam3-cpp backends/qwen3-tts-cpp backends/vibevoice-cpp backends/localvqe backends/tinygrad backends/sherpa-onnx backends/ds4 backends/ds4-darwin backends/liquid-audio
|
|
|
|
GOCMD=go
|
|
GOTEST=$(GOCMD) test
|
|
GOVET=$(GOCMD) vet
|
|
BINARY_NAME=local-ai
|
|
LAUNCHER_BINARY_NAME=local-ai-launcher
|
|
|
|
UBUNTU_VERSION?=2404
|
|
UBUNTU_CODENAME?=noble
|
|
|
|
# Optional Ubuntu apt mirror overrides forwarded to docker builds.
|
|
# Empty = use upstream archive.ubuntu.com / security.ubuntu.com / ports.ubuntu.com.
|
|
# Set e.g. APT_MIRROR=http://azure.archive.ubuntu.com to route apt traffic
|
|
# during outages of the default Ubuntu pool.
|
|
APT_MIRROR?=
|
|
APT_PORTS_MIRROR?=
|
|
|
|
GORELEASER?=
|
|
|
|
export BUILD_TYPE?=
|
|
export CUDA_MAJOR_VERSION?=13
|
|
export CUDA_MINOR_VERSION?=0
|
|
|
|
GO_TAGS?=
|
|
BUILD_ID?=
|
|
NATIVE?=false
|
|
|
|
TEST_DIR=/tmp/test
|
|
|
|
TEST_FLAKES?=5
|
|
|
|
RANDOM := $(shell bash -c 'echo $$RANDOM')
|
|
|
|
VERSION?=$(shell git describe --always --tags || echo "dev" )
|
|
# go tool nm ./local-ai | grep Commit
|
|
LD_FLAGS?=-s -w
|
|
override LD_FLAGS += -X "github.com/mudler/LocalAI/internal.Version=$(VERSION)"
|
|
override LD_FLAGS += -X "github.com/mudler/LocalAI/internal.Commit=$(shell git rev-parse HEAD)"
|
|
|
|
OPTIONAL_TARGETS?=
|
|
|
|
export OS := $(shell uname -s)
|
|
ARCH := $(shell uname -m)
|
|
GREEN := $(shell tput -Txterm setaf 2)
|
|
YELLOW := $(shell tput -Txterm setaf 3)
|
|
WHITE := $(shell tput -Txterm setaf 7)
|
|
CYAN := $(shell tput -Txterm setaf 6)
|
|
RESET := $(shell tput -Txterm sgr0)
|
|
|
|
# Default Docker bridge IP
|
|
E2E_BRIDGE_IP?=172.17.0.1
|
|
|
|
ifndef UNAME_S
|
|
UNAME_S := $(shell uname -s)
|
|
endif
|
|
|
|
ifeq ($(OS),Darwin)
|
|
ifeq ($(OSX_SIGNING_IDENTITY),)
|
|
OSX_SIGNING_IDENTITY := $(shell security find-identity -v -p codesigning | grep '"' | head -n 1 | sed -E 's/.*"(.*)"/\1/')
|
|
endif
|
|
endif
|
|
|
|
# check if goreleaser exists
|
|
ifeq (, $(shell which goreleaser))
|
|
GORELEASER=curl -sfL https://goreleaser.com/static/run | bash -s --
|
|
else
|
|
GORELEASER=$(shell which goreleaser)
|
|
endif
|
|
|
|
TEST_PATHS?=./api/... ./pkg/... ./core/... ./backend/go/cloud-proxy/... ./backend/go/local-store/...
|
|
|
|
## Coverage output and the committed baseline that CI compares against.
|
|
## The gate is strict: total coverage must never decrease (no tolerance).
|
|
## covermode=atomic makes line coverage deterministic regardless of test
|
|
## ordering or flake retries, so there is no run-to-run jitter to absorb.
|
|
COVERAGE_DIR?=$(abspath ./coverage)
|
|
COVERAGE_PROFILE?=$(COVERAGE_DIR)/coverage.out
|
|
COVERAGE_BASELINE?=coverage-baseline.txt
|
|
## Coverage is collected one recursive root at a time and merged (see
|
|
## scripts/run-coverage.sh): passing several recursive roots to a single
|
|
## ginkgo invocation only keeps one root's coverprofile. Mirrors TEST_PATHS
|
|
## minus ./api (which doesn't exist).
|
|
COVERAGE_ROOTS?=./pkg ./core
|
|
## Build tags for the coverage build. `auth` is required to compile the real
|
|
## auth implementation and its ~150 `//go:build auth` tests (otherwise they're
|
|
## invisible and the gate scores auth against a stub). `debug` matches `test`.
|
|
COVERAGE_TAGS?=debug auth
|
|
## Coverage is attributed to these packages via --coverpkg, so the in-process
|
|
## integration suites (COVERAGE_E2E_ROOTS) credit the core/http handlers they
|
|
## drive over HTTP — not just their own test package.
|
|
COVERAGE_COVERPKG?=github.com/mudler/LocalAI/core/...,github.com/mudler/LocalAI/pkg/...
|
|
## In-process integration suites folded into coverage. Run non-recursively
|
|
## (excludes tests/e2e/distributed, which needs containers) with the mock
|
|
## backend built by prepare-test. real-models specs need a downloaded model,
|
|
## so they're filtered out. NOTE: tests/integration is intentionally NOT here —
|
|
## it needs the local-store backend built (`make backends/local-store`), which
|
|
## the coverage CI job doesn't do.
|
|
COVERAGE_E2E_ROOTS?=./tests/e2e
|
|
COVERAGE_E2E_LABELS?=!real-models
|
|
## Drop generated protobuf from the denominator (it has no tests by design).
|
|
COVERAGE_EXCLUDE_RE?=grpc/proto/.*[.]pb[.]go
|
|
|
|
|
|
.PHONY: all test test-coverage test-coverage-baseline test-coverage-check test-ui test-ui-coverage-baseline test-ui-coverage-check install-hooks build vendor lint lint-all
|
|
|
|
all: help
|
|
|
|
## GENERIC
|
|
rebuild: ## Rebuilds the project
|
|
$(GOCMD) clean -cache
|
|
$(MAKE) build
|
|
|
|
clean: ## Remove build related file
|
|
$(GOCMD) clean -cache
|
|
rm -f prepare
|
|
rm -rf $(BINARY_NAME)
|
|
rm -rf release/
|
|
$(MAKE) protogen-clean
|
|
rmdir pkg/grpc/proto || true
|
|
|
|
clean-tests:
|
|
rm -rf test-models
|
|
rm -rf test-dir
|
|
rm -f tests/e2e/mock-backend/mock-backend
|
|
|
|
## Install Go tools
|
|
install-go-tools:
|
|
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
|
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
|
|
|
## React UI:
|
|
react-ui:
|
|
ifneq ($(wildcard core/http/react-ui/dist),)
|
|
@echo "react-ui dist already exists, skipping build"
|
|
else
|
|
cd core/http/react-ui && npm install && npm run build
|
|
endif
|
|
|
|
react-ui-docker:
|
|
docker run --entrypoint /bin/bash -v $(CURDIR):/app:z oven/bun:1 \
|
|
-c "cd /app/core/http/react-ui && bun install && bun run build"
|
|
|
|
core/http/react-ui/dist: react-ui
|
|
|
|
## Build:
|
|
|
|
build: protogen-go generate install-go-tools core/http/react-ui/dist ## Build the project
|
|
$(info ${GREEN}I local-ai build info:${RESET})
|
|
$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
|
|
$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
|
|
$(info ${GREEN}I LD_FLAGS: ${YELLOW}$(LD_FLAGS)${RESET})
|
|
$(info ${GREEN}I UPX: ${YELLOW}$(UPX)${RESET})
|
|
rm -rf $(BINARY_NAME) || true
|
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./cmd/local-ai
|
|
|
|
build-launcher: ## Build the launcher application
|
|
$(info ${GREEN}I local-ai launcher build info:${RESET})
|
|
$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
|
|
$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
|
|
$(info ${GREEN}I LD_FLAGS: ${YELLOW}$(LD_FLAGS)${RESET})
|
|
rm -rf $(LAUNCHER_BINARY_NAME) || true
|
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(LAUNCHER_BINARY_NAME) ./cmd/launcher
|
|
|
|
build-all: build build-launcher ## Build both server and launcher
|
|
|
|
build-dev: ## Run LocalAI in dev mode with live reload
|
|
@command -v air >/dev/null 2>&1 || go install github.com/air-verse/air@latest
|
|
air -c .air.toml
|
|
|
|
dev-dist:
|
|
$(GORELEASER) build --snapshot --clean
|
|
|
|
dist:
|
|
$(GORELEASER) build --clean
|
|
|
|
osx-signed: build
|
|
codesign --deep --force --sign "$(OSX_SIGNING_IDENTITY)" --entitlements "./Entitlements.plist" "./$(BINARY_NAME)"
|
|
|
|
## Run
|
|
run: ## run local-ai
|
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) run ./cmd/local-ai
|
|
|
|
prepare-test: protogen-go build-mock-backend
|
|
|
|
########################################################
|
|
## Tests
|
|
########################################################
|
|
|
|
## Test targets
|
|
## After the test-suite reorg (see plans/test-reorg) the default `make test`
|
|
## no longer downloads multi-GB GGUF/whisper fixtures or builds llama-cpp /
|
|
## transformers / piper / whisper / stablediffusion-ggml. core/http/app_test.go
|
|
## now drives the mock-backend binary built by build-mock-backend; real-backend
|
|
## inference moved into tests/e2e-backends/ (per-backend, path-filtered) and
|
|
## tests/e2e-aio/ (nightly).
|
|
test: prepare-test
|
|
@echo 'Running tests'
|
|
export GO_TAGS="debug"
|
|
OPUS_SHIM_LIBRARY=$(abspath ./pkg/opus/shim/libopusshim.so) \
|
|
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
|
|
|
|
## Runs the core suite ($(TEST_PATHS)) with statement-coverage instrumentation
|
|
## and writes a merged profile to $(COVERAGE_PROFILE). Deliberately omits
|
|
## --fail-fast so a single failure doesn't truncate the coverage number, and
|
|
## uses covermode=atomic so the result is deterministic. Prints the total.
|
|
test-coverage: prepare-test
|
|
@echo 'Running tests with coverage'
|
|
GINKGO_TAGS="$(COVERAGE_TAGS)" \
|
|
COVERAGE_COVERPKG="$(COVERAGE_COVERPKG)" \
|
|
COVERAGE_E2E_ROOTS="$(COVERAGE_E2E_ROOTS)" \
|
|
COVERAGE_E2E_LABELS="$(COVERAGE_E2E_LABELS)" \
|
|
COVERAGE_EXCLUDE_RE='$(COVERAGE_EXCLUDE_RE)' \
|
|
OPUS_SHIM_LIBRARY=$(abspath ./pkg/opus/shim/libopusshim.so) \
|
|
scripts/run-coverage.sh $(COVERAGE_DIR) $(COVERAGE_PROFILE) $(TEST_FLAKES) $(COVERAGE_ROOTS)
|
|
@$(GOCMD) tool cover -html=$(COVERAGE_PROFILE) -o $(COVERAGE_DIR)/coverage.html
|
|
@$(GOCMD) tool cover -func=$(COVERAGE_PROFILE) | tail -n1
|
|
|
|
## Writes the current total coverage to $(COVERAGE_BASELINE). Run this (and
|
|
## commit the result) whenever a change legitimately raises coverage so the
|
|
## ratchet moves up. Never lower it by hand.
|
|
test-coverage-baseline: test-coverage
|
|
@$(GOCMD) tool cover -func=$(COVERAGE_PROFILE) | awk '/^total:/{gsub(/%/,"",$$NF); print $$NF}' > $(COVERAGE_BASELINE)
|
|
@echo "Saved coverage baseline: $$(cat $(COVERAGE_BASELINE))%"
|
|
|
|
## CI gate: fails if total coverage dropped more than COVERAGE_TOLERANCE
|
|
## (default 0.5pp) below the committed baseline. A small tolerance absorbs the
|
|
## run-to-run jitter from the in-process tests/e2e suite folded in via
|
|
## --coverpkg (timing-dependent which handler lines execute).
|
|
test-coverage-check: test-coverage
|
|
@scripts/coverage-check.sh $(COVERAGE_PROFILE) $(COVERAGE_BASELINE)
|
|
|
|
########################################################
|
|
## Lint
|
|
########################################################
|
|
## Runs golangci-lint with config from .golangci.yml. Includes the standard
|
|
## linter set plus forbidigo, which enforces the Ginkgo/Gomega-only test
|
|
## convention documented in .agents/coding-style.md.
|
|
##
|
|
## LINT_EXCLUDE_DIRS_RE matches directories whose Go packages can't typecheck
|
|
## without C/C++ headers we don't install in the lint runner (cgo wrappers
|
|
## around llama.cpp, piper/spdlog, silero-vad/onnxruntime, and Fyne/OpenGL for
|
|
## the launcher). Their compile-time correctness is enforced by their own
|
|
## build pipelines. Keep this as a deny list — `go list ./...` discovers
|
|
## everything else automatically, so new packages are scanned by default.
|
|
LINT_EXCLUDE_DIRS_RE=/(backend/go/(piper|silero-vad|llm)|cmd/launcher)(/|$$)
|
|
|
|
## Set LINT_NEW_FROM to a git ref to override .golangci.yml's
|
|
## new-from-merge-base (origin/master). Useful from a fork clone where
|
|
## origin/master is stale relative to the canonical repo — the pre-commit
|
|
## hook passes the resolved upstream ref here so local lint matches CI.
|
|
LINT_NEW_FROM?=
|
|
lint:
|
|
@command -v golangci-lint >/dev/null 2>&1 || { \
|
|
echo 'golangci-lint not installed. Install: go install github.com/golangci/golangci-lint/v2/cmd/golangci-lint@latest'; \
|
|
exit 1; \
|
|
}
|
|
golangci-lint run $(if $(LINT_NEW_FROM),--new-from-merge-base=$(LINT_NEW_FROM),) $$(go list -e -f '{{.Dir}}' ./... | grep -vE '$(LINT_EXCLUDE_DIRS_RE)')
|
|
|
|
## Like `lint` but reports every issue, including the pre-existing baseline
|
|
## that `lint` ignores via .golangci.yml's new-from-merge-base. Use this to
|
|
## see what's available to clean up.
|
|
lint-all:
|
|
@command -v golangci-lint >/dev/null 2>&1 || { \
|
|
echo 'golangci-lint not installed. Install: go install github.com/golangci/golangci-lint/v2/cmd/golangci-lint@latest'; \
|
|
exit 1; \
|
|
}
|
|
golangci-lint run --new=false --new-from-merge-base= --new-from-rev= $$(go list -e -f '{{.Dir}}' ./... | grep -vE '$(LINT_EXCLUDE_DIRS_RE)')
|
|
|
|
########################################################
|
|
## Git hooks
|
|
########################################################
|
|
## Points git at the versioned .githooks/ directory so the pre-commit hook
|
|
## (lint + coverage gate) runs locally. Run once per clone. Undo with:
|
|
## `git config --unset core.hooksPath`. Skip a single commit with
|
|
## `git commit --no-verify`.
|
|
install-hooks:
|
|
git config core.hooksPath .githooks
|
|
@echo 'Installed git hooks: core.hooksPath -> .githooks (pre-commit runs lint + test-coverage-check on Go changes)'
|
|
|
|
########################################################
|
|
## E2E AIO tests (uses standard image with pre-configured models)
|
|
########################################################
|
|
|
|
docker-build-e2e:
|
|
docker build \
|
|
--build-arg MAKEFLAGS="--jobs=5 --output-sync=target" \
|
|
--build-arg BASE_IMAGE=$(BASE_IMAGE) \
|
|
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
|
|
--build-arg BUILD_TYPE=$(BUILD_TYPE) \
|
|
--build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) \
|
|
--build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) \
|
|
--build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \
|
|
--build-arg UBUNTU_CODENAME=$(UBUNTU_CODENAME) \
|
|
--build-arg APT_MIRROR=$(APT_MIRROR) \
|
|
--build-arg APT_PORTS_MIRROR=$(APT_PORTS_MIRROR) \
|
|
--build-arg GO_TAGS="$(GO_TAGS)" \
|
|
-t local-ai:tests -f Dockerfile .
|
|
|
|
e2e-aio:
|
|
LOCALAI_BACKEND_DIR=$(abspath ./backends) \
|
|
LOCALAI_MODELS_DIR=$(abspath ./tests/e2e-aio/models) \
|
|
LOCALAI_IMAGE_TAG=tests \
|
|
LOCALAI_IMAGE=local-ai \
|
|
$(MAKE) run-e2e-aio
|
|
|
|
run-e2e-aio: protogen-go
|
|
@echo 'Running e2e AIO tests'
|
|
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) -v -r ./tests/e2e-aio
|
|
|
|
# Distributed architecture e2e (PostgreSQL + NATS via testcontainers).
|
|
# Includes NatsJWT specs (JWT-enabled NATS). Requires Docker.
|
|
# VLLMMultinode is excluded here; use test-e2e-vllm-multinode for that.
|
|
test-e2e-distributed: protogen-go
|
|
@echo 'Running distributed e2e tests (label Distributed, incl. NatsJWT)'
|
|
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter='Distributed && !VLLMMultinode' --flake-attempts $(TEST_FLAKES) -v -r ./tests/e2e/distributed
|
|
|
|
# vLLM multi-node DP smoke (CPU). Builds local-ai:tests and the
|
|
# cpu-vllm backend from the current working tree, then drives a
|
|
# head + headless follower via testcontainers-go and asserts a chat
|
|
# completion. BuildKit caches both images, so re-runs only rebuild
|
|
# what changed. The test lives under tests/e2e/distributed and is
|
|
# selected by the VLLMMultinode label so it doesn't run alongside
|
|
# test-e2e-distributed.
|
|
test-e2e-vllm-multinode: docker-build-e2e extract-backend-vllm protogen-go
|
|
@echo 'Running e2e vLLM multi-node DP test'
|
|
LOCALAI_IMAGE=local-ai \
|
|
LOCALAI_IMAGE_TAG=tests \
|
|
LOCALAI_VLLM_BACKEND_DIR=$(abspath ./local-backends/vllm) \
|
|
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter='VLLMMultinode' -v -r ./tests/e2e/distributed
|
|
|
|
########################################################
|
|
## E2E tests
|
|
########################################################
|
|
|
|
prepare-e2e:
|
|
docker build \
|
|
--build-arg IMAGE_TYPE=core \
|
|
--build-arg BUILD_TYPE=$(BUILD_TYPE) \
|
|
--build-arg BASE_IMAGE=$(BASE_IMAGE) \
|
|
--build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) \
|
|
--build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) \
|
|
--build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \
|
|
--build-arg UBUNTU_CODENAME=$(UBUNTU_CODENAME) \
|
|
--build-arg APT_MIRROR=$(APT_MIRROR) \
|
|
--build-arg APT_PORTS_MIRROR=$(APT_PORTS_MIRROR) \
|
|
--build-arg GO_TAGS="$(GO_TAGS)" \
|
|
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
|
|
-t localai-tests .
|
|
|
|
run-e2e-image:
|
|
docker run -p 5390:8080 -e MODELS_PATH=/models -e THREADS=1 -e DEBUG=true -d --rm -v $(TEST_DIR):/models --name e2e-tests-$(RANDOM) localai-tests
|
|
|
|
test-e2e: build-mock-backend build-cloud-proxy-backend prepare-e2e run-e2e-image
|
|
@echo 'Running e2e tests'
|
|
BUILD_TYPE=$(BUILD_TYPE) \
|
|
LOCALAI_API=http://$(E2E_BRIDGE_IP):5390 \
|
|
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) -v -r ./tests/e2e
|
|
$(MAKE) clean-mock-backend
|
|
$(MAKE) clean-cloud-proxy-backend
|
|
$(MAKE) teardown-e2e
|
|
docker rmi localai-tests
|
|
|
|
teardown-e2e:
|
|
rm -rf $(TEST_DIR) || true
|
|
docker stop $$(docker ps -q --filter ancestor=localai-tests)
|
|
|
|
########################################################
|
|
## Integration and unit tests
|
|
########################################################
|
|
|
|
## Storage / vector-store integration. Requires the local-store backend to
|
|
## be available — we build it on demand and pass its location via
|
|
## BACKENDS_PATH (the model loader looks there for the gRPC binary).
|
|
test-stores: backends/local-store
|
|
BACKENDS_PATH=$(abspath ./)/backends \
|
|
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) -v -r tests/integration
|
|
|
|
test-opus:
|
|
@echo 'Running opus backend tests'
|
|
$(MAKE) -C backend/go/opus libopusshim.so
|
|
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) -v -r ./backend/go/opus/...
|
|
|
|
test-opus-docker:
|
|
@echo 'Running opus backend tests in Docker'
|
|
docker build --target builder \
|
|
--build-arg BUILD_TYPE=$(or $(BUILD_TYPE),) \
|
|
--build-arg BASE_IMAGE=$(or $(BASE_IMAGE),ubuntu:24.04) \
|
|
--build-arg APT_MIRROR=$(APT_MIRROR) \
|
|
--build-arg APT_PORTS_MIRROR=$(APT_PORTS_MIRROR) \
|
|
--build-arg BACKEND=opus \
|
|
-t localai-opus-test -f backend/Dockerfile.golang .
|
|
docker run --rm localai-opus-test \
|
|
bash -c 'cd /LocalAI && go run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) -v -r ./backend/go/opus/...'
|
|
|
|
test-realtime: build-mock-backend
|
|
@echo 'Running realtime e2e tests (mock backend)'
|
|
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="Realtime && !real-models" --flake-attempts $(TEST_FLAKES) -v -r ./tests/e2e
|
|
|
|
# Container-based real-model realtime testing. Build env vars / pipeline
|
|
# definition kept here so test-realtime-models-docker can drive a fully wired
|
|
# pipeline (VAD + STT + LLM + TTS) from inside a containerised runner.
|
|
REALTIME_VAD?=silero-vad-ggml
|
|
REALTIME_STT?=whisper-1
|
|
REALTIME_LLM?=qwen3-0.6b
|
|
REALTIME_TTS?=tts-1
|
|
|
|
# --- Container-based real-model testing ---
|
|
|
|
REALTIME_BACKEND_NAMES ?= silero-vad whisper llama-cpp kokoro
|
|
REALTIME_MODELS_DIR ?= $(abspath ./models)
|
|
REALTIME_BACKENDS_DIR ?= $(abspath ./local-backends)
|
|
REALTIME_DOCKER_FLAGS ?= --gpus all
|
|
|
|
local-backends:
|
|
mkdir -p local-backends
|
|
|
|
extract-backend-%: docker-build-% local-backends
|
|
@echo "Extracting backend $*..."
|
|
@CID=$$(docker create --entrypoint=/run.sh local-ai-backend:$*) && \
|
|
rm -rf local-backends/$* && mkdir -p local-backends/$* && \
|
|
docker cp $$CID:/ - | tar -xf - -C local-backends/$* && \
|
|
docker rm $$CID > /dev/null
|
|
|
|
extract-realtime-backends: $(addprefix extract-backend-,$(REALTIME_BACKEND_NAMES))
|
|
|
|
test-realtime-models-docker: build-mock-backend
|
|
docker build --target build-requirements \
|
|
--build-arg BUILD_TYPE=$(or $(BUILD_TYPE),cublas) \
|
|
--build-arg CUDA_MAJOR_VERSION=$(or $(CUDA_MAJOR_VERSION),13) \
|
|
--build-arg CUDA_MINOR_VERSION=$(or $(CUDA_MINOR_VERSION),0) \
|
|
--build-arg APT_MIRROR=$(APT_MIRROR) \
|
|
--build-arg APT_PORTS_MIRROR=$(APT_PORTS_MIRROR) \
|
|
-t localai-test-runner .
|
|
docker run --rm \
|
|
$(REALTIME_DOCKER_FLAGS) \
|
|
-v $(abspath ./):/build \
|
|
-v $(REALTIME_MODELS_DIR):/models:ro \
|
|
-v $(REALTIME_BACKENDS_DIR):/backends \
|
|
-v localai-go-cache:/root/go/pkg/mod \
|
|
-v localai-go-build-cache:/root/.cache/go-build \
|
|
-e REALTIME_TEST_MODEL=$${REALTIME_TEST_MODEL:-realtime-test-pipeline} \
|
|
-e REALTIME_VAD=$(REALTIME_VAD) \
|
|
-e REALTIME_STT=$(REALTIME_STT) \
|
|
-e REALTIME_LLM=$(REALTIME_LLM) \
|
|
-e REALTIME_TTS=$(REALTIME_TTS) \
|
|
-e REALTIME_BACKENDS_PATH=/backends \
|
|
-e REALTIME_MODELS_PATH=/models \
|
|
-w /build \
|
|
localai-test-runner \
|
|
bash -c 'git config --global --add safe.directory /build && \
|
|
make protogen-go && make build-mock-backend && \
|
|
go run github.com/onsi/ginkgo/v2/ginkgo --label-filter="Realtime" --flake-attempts $(TEST_FLAKES) -v -r ./tests/e2e'
|
|
|
|
test-container:
|
|
docker build --target requirements -t local-ai-test-container .
|
|
docker run -ti --rm --entrypoint /bin/bash -ti -v $(abspath ./):/build local-ai-test-container
|
|
|
|
########################################################
|
|
## Help
|
|
########################################################
|
|
|
|
## Help:
|
|
help: ## Show this help.
|
|
@echo ''
|
|
@echo 'Usage:'
|
|
@echo ' ${YELLOW}make${RESET} ${GREEN}<target>${RESET}'
|
|
@echo ''
|
|
@echo 'Targets:'
|
|
@awk 'BEGIN {FS = ":.*?## "} { \
|
|
if (/^[a-zA-Z_-]+:.*?##.*$$/) {printf " ${YELLOW}%-20s${GREEN}%s${RESET}\n", $$1, $$2} \
|
|
else if (/^## .*$$/) {printf " ${CYAN}%s${RESET}\n", substr($$1,4)} \
|
|
}' $(MAKEFILE_LIST)
|
|
|
|
########################################################
|
|
## Backends
|
|
########################################################
|
|
|
|
.PHONY: protogen
|
|
protogen: protogen-go
|
|
|
|
protoc:
|
|
@OS_NAME=$$(uname -s | tr '[:upper:]' '[:lower:]'); \
|
|
ARCH_NAME=$$(uname -m); \
|
|
if [ "$$OS_NAME" = "darwin" ]; then \
|
|
if [ "$$ARCH_NAME" = "arm64" ]; then \
|
|
FILE=protoc-31.1-osx-aarch_64.zip; \
|
|
elif [ "$$ARCH_NAME" = "x86_64" ]; then \
|
|
FILE=protoc-31.1-osx-x86_64.zip; \
|
|
else \
|
|
echo "Unsupported macOS architecture: $$ARCH_NAME"; exit 1; \
|
|
fi; \
|
|
elif [ "$$OS_NAME" = "linux" ]; then \
|
|
if [ "$$ARCH_NAME" = "x86_64" ]; then \
|
|
FILE=protoc-31.1-linux-x86_64.zip; \
|
|
elif [ "$$ARCH_NAME" = "aarch64" ] || [ "$$ARCH_NAME" = "arm64" ]; then \
|
|
FILE=protoc-31.1-linux-aarch_64.zip; \
|
|
elif [ "$$ARCH_NAME" = "ppc64le" ]; then \
|
|
FILE=protoc-31.1-linux-ppcle_64.zip; \
|
|
elif [ "$$ARCH_NAME" = "s390x" ]; then \
|
|
FILE=protoc-31.1-linux-s390_64.zip; \
|
|
elif [ "$$ARCH_NAME" = "i386" ] || [ "$$ARCH_NAME" = "x86" ]; then \
|
|
FILE=protoc-31.1-linux-x86_32.zip; \
|
|
else \
|
|
echo "Unsupported Linux architecture: $$ARCH_NAME"; exit 1; \
|
|
fi; \
|
|
else \
|
|
echo "Unsupported OS: $$OS_NAME"; exit 1; \
|
|
fi; \
|
|
URL=https://github.com/protocolbuffers/protobuf/releases/download/v31.1/$$FILE; \
|
|
curl -L $$URL -o protoc.zip && \
|
|
unzip -j -d $(CURDIR) protoc.zip bin/protoc && rm protoc.zip
|
|
|
|
.PHONY: protogen-go
|
|
protogen-go: protoc install-go-tools
|
|
mkdir -p pkg/grpc/proto
|
|
# install-go-tools writes protoc-gen-go and protoc-gen-go-grpc into
|
|
# $(shell go env GOPATH)/bin, which isn't on every dev's PATH. protoc
|
|
# resolves its code-gen plugins via PATH, so without this prefix the
|
|
# generate step fails with "protoc-gen-go: program not found". Prepend
|
|
# GOPATH/bin so the freshly-installed plugins win without requiring a
|
|
# shell-profile change.
|
|
PATH="$$(go env GOPATH)/bin:$$PATH" ./protoc --experimental_allow_proto3_optional -Ibackend/ --go_out=pkg/grpc/proto/ --go_opt=paths=source_relative --go-grpc_out=pkg/grpc/proto/ --go-grpc_opt=paths=source_relative \
|
|
backend/backend.proto
|
|
|
|
core/config/inference_defaults.json: ## Fetch inference defaults from unsloth (only if missing)
|
|
$(GOCMD) generate ./core/config/...
|
|
|
|
.PHONY: generate
|
|
generate: core/config/inference_defaults.json ## Ensure inference defaults exist
|
|
|
|
.PHONY: generate-force
|
|
generate-force: ## Re-fetch inference defaults from unsloth (always)
|
|
$(GOCMD) generate ./core/config/...
|
|
|
|
.PHONY: protogen-go-clean
|
|
protogen-go-clean:
|
|
$(RM) pkg/grpc/proto/backend.pb.go pkg/grpc/proto/backend_grpc.pb.go
|
|
$(RM) bin/*
|
|
|
|
prepare-test-extra: protogen-python
|
|
$(MAKE) -C backend/python/transformers
|
|
$(MAKE) -C backend/python/outetts
|
|
$(MAKE) -C backend/python/diffusers
|
|
$(MAKE) -C backend/python/chatterbox
|
|
$(MAKE) -C backend/python/vllm
|
|
$(MAKE) -C backend/python/vllm-omni
|
|
$(MAKE) -C backend/python/sglang
|
|
$(MAKE) -C backend/python/vibevoice
|
|
$(MAKE) -C backend/python/liquid-audio
|
|
$(MAKE) -C backend/python/moonshine
|
|
$(MAKE) -C backend/python/pocket-tts
|
|
$(MAKE) -C backend/python/qwen-tts
|
|
$(MAKE) -C backend/python/fish-speech
|
|
$(MAKE) -C backend/python/faster-qwen3-tts
|
|
$(MAKE) -C backend/python/qwen-asr
|
|
$(MAKE) -C backend/python/nemo
|
|
$(MAKE) -C backend/python/voxcpm
|
|
$(MAKE) -C backend/python/faster-whisper
|
|
$(MAKE) -C backend/python/whisperx
|
|
$(MAKE) -C backend/python/ace-step
|
|
$(MAKE) -C backend/python/trl
|
|
$(MAKE) -C backend/python/tinygrad
|
|
$(MAKE) -C backend/python/insightface
|
|
$(MAKE) -C backend/python/speaker-recognition
|
|
$(MAKE) -C backend/rust/kokoros kokoros-grpc
|
|
$(MAKE) -C backend/go/rfdetr-cpp
|
|
|
|
test-extra: prepare-test-extra
|
|
$(MAKE) -C backend/python/transformers test
|
|
$(MAKE) -C backend/python/outetts test
|
|
$(MAKE) -C backend/python/diffusers test
|
|
$(MAKE) -C backend/python/chatterbox test
|
|
$(MAKE) -C backend/python/vllm test
|
|
$(MAKE) -C backend/python/vllm-omni test
|
|
$(MAKE) -C backend/python/vibevoice test
|
|
$(MAKE) -C backend/python/liquid-audio test
|
|
$(MAKE) -C backend/python/moonshine test
|
|
$(MAKE) -C backend/python/pocket-tts test
|
|
$(MAKE) -C backend/python/qwen-tts test
|
|
$(MAKE) -C backend/python/fish-speech test
|
|
$(MAKE) -C backend/python/faster-qwen3-tts test
|
|
$(MAKE) -C backend/python/qwen-asr test
|
|
$(MAKE) -C backend/python/nemo test
|
|
$(MAKE) -C backend/python/voxcpm test
|
|
$(MAKE) -C backend/python/faster-whisper test
|
|
$(MAKE) -C backend/python/whisperx test
|
|
$(MAKE) -C backend/python/ace-step test
|
|
$(MAKE) -C backend/python/trl test
|
|
$(MAKE) -C backend/python/tinygrad test
|
|
$(MAKE) -C backend/python/insightface test
|
|
$(MAKE) -C backend/python/speaker-recognition test
|
|
$(MAKE) -C backend/rust/kokoros test
|
|
$(MAKE) -C backend/go/rfdetr-cpp test
|
|
|
|
##
|
|
## End-to-end gRPC tests that exercise a built backend container image.
|
|
##
|
|
## The test suite in tests/e2e-backends is backend-agnostic. You drive it via env
|
|
## vars (see tests/e2e-backends/backend_test.go for the full list) and the
|
|
## capability-driven harness picks which gRPC RPCs to exercise:
|
|
##
|
|
## BACKEND_IMAGE Required. Docker image to test, e.g. local-ai-backend:llama-cpp.
|
|
## BACKEND_TEST_MODEL_URL URL of a model file to download and load.
|
|
## BACKEND_TEST_MODEL_FILE Path to an already-downloaded model (skips download).
|
|
## BACKEND_TEST_MODEL_NAME HuggingFace repo id (e.g. Qwen/Qwen2.5-0.5B-Instruct).
|
|
## Use this instead of MODEL_URL for backends that
|
|
## resolve HF model ids natively (vllm, vllm-omni).
|
|
## BACKEND_TEST_CAPS Comma-separated capabilities, default "health,load,predict,stream".
|
|
## Adds "tools" to exercise ChatDelta tool call extraction.
|
|
## BACKEND_TEST_PROMPT Override the prompt used in predict/stream specs.
|
|
## BACKEND_TEST_OPTIONS Comma-separated Options[] entries forwarded to LoadModel,
|
|
## e.g. "tool_parser:hermes,reasoning_parser:qwen3".
|
|
##
|
|
## Direct usage (image already built, no docker-build-* dependency):
|
|
##
|
|
## make test-extra-backend BACKEND_IMAGE=local-ai-backend:llama-cpp \
|
|
## BACKEND_TEST_MODEL_URL=https://.../model.gguf
|
|
##
|
|
## Convenience wrappers below build a specific backend image first, then run the
|
|
## suite against it.
|
|
##
|
|
BACKEND_TEST_MODEL_URL?=https://huggingface.co/Qwen/Qwen3-0.6B-GGUF/resolve/main/Qwen3-0.6B-Q8_0.gguf
|
|
|
|
## Generic target — runs the suite against whatever BACKEND_IMAGE points at.
|
|
## Depends on protogen-go so pkg/grpc/proto is generated before `go test`.
|
|
test-extra-backend: protogen-go
|
|
@test -n "$$BACKEND_IMAGE" || { echo "BACKEND_IMAGE must be set" >&2; exit 1; }
|
|
BACKEND_IMAGE="$$BACKEND_IMAGE" \
|
|
BACKEND_TEST_MODEL_URL="$${BACKEND_TEST_MODEL_URL:-$(BACKEND_TEST_MODEL_URL)}" \
|
|
BACKEND_TEST_MODEL_FILE="$$BACKEND_TEST_MODEL_FILE" \
|
|
BACKEND_TEST_MODEL_NAME="$$BACKEND_TEST_MODEL_NAME" \
|
|
BACKEND_TEST_MMPROJ_URL="$$BACKEND_TEST_MMPROJ_URL" \
|
|
BACKEND_TEST_MMPROJ_FILE="$$BACKEND_TEST_MMPROJ_FILE" \
|
|
BACKEND_TEST_AUDIO_URL="$$BACKEND_TEST_AUDIO_URL" \
|
|
BACKEND_TEST_AUDIO_FILE="$$BACKEND_TEST_AUDIO_FILE" \
|
|
BACKEND_TEST_CAPS="$$BACKEND_TEST_CAPS" \
|
|
BACKEND_TEST_PROMPT="$$BACKEND_TEST_PROMPT" \
|
|
BACKEND_TEST_OPTIONS="$$BACKEND_TEST_OPTIONS" \
|
|
BACKEND_TEST_TOOL_PROMPT="$$BACKEND_TEST_TOOL_PROMPT" \
|
|
BACKEND_TEST_TOOL_NAME="$$BACKEND_TEST_TOOL_NAME" \
|
|
BACKEND_TEST_CACHE_TYPE_K="$$BACKEND_TEST_CACHE_TYPE_K" \
|
|
BACKEND_TEST_CACHE_TYPE_V="$$BACKEND_TEST_CACHE_TYPE_V" \
|
|
BACKEND_TEST_FACE_IMAGE_1_URL="$$BACKEND_TEST_FACE_IMAGE_1_URL" \
|
|
BACKEND_TEST_FACE_IMAGE_1_FILE="$$BACKEND_TEST_FACE_IMAGE_1_FILE" \
|
|
BACKEND_TEST_FACE_IMAGE_2_URL="$$BACKEND_TEST_FACE_IMAGE_2_URL" \
|
|
BACKEND_TEST_FACE_IMAGE_2_FILE="$$BACKEND_TEST_FACE_IMAGE_2_FILE" \
|
|
BACKEND_TEST_FACE_IMAGE_3_URL="$$BACKEND_TEST_FACE_IMAGE_3_URL" \
|
|
BACKEND_TEST_FACE_IMAGE_3_FILE="$$BACKEND_TEST_FACE_IMAGE_3_FILE" \
|
|
BACKEND_TEST_VERIFY_DISTANCE_CEILING="$$BACKEND_TEST_VERIFY_DISTANCE_CEILING" \
|
|
go test -v -timeout 30m ./tests/e2e-backends/...
|
|
|
|
## Convenience wrappers: build the image, then exercise it.
|
|
test-extra-backend-llama-cpp: docker-build-llama-cpp
|
|
BACKEND_IMAGE=local-ai-backend:llama-cpp \
|
|
BACKEND_TEST_CAPS=health,load,predict,stream,logprobs,logit_bias \
|
|
$(MAKE) test-extra-backend
|
|
|
|
test-extra-backend-ik-llama-cpp: docker-build-ik-llama-cpp
|
|
BACKEND_IMAGE=local-ai-backend:ik-llama-cpp $(MAKE) test-extra-backend
|
|
|
|
## turboquant: exercises the llama.cpp-fork backend with the fork's
|
|
## *TurboQuant-specific* KV-cache types (turbo3 for both K and V). turbo3
|
|
## is what makes this backend distinct from stock llama-cpp — picking q8_0
|
|
## here would only test the standard llama.cpp code path that the upstream
|
|
## llama-cpp backend already covers. The fork auto-enables flash_attention
|
|
## when turbo3/turbo4 are active, so we don't need to set it explicitly.
|
|
test-extra-backend-turboquant: docker-build-turboquant
|
|
BACKEND_IMAGE=local-ai-backend:turboquant \
|
|
BACKEND_TEST_CACHE_TYPE_K=q8_0 \
|
|
BACKEND_TEST_CACHE_TYPE_V=turbo3 \
|
|
$(MAKE) test-extra-backend
|
|
|
|
## Audio transcription wrapper for the llama-cpp backend.
|
|
## Drives the new AudioTranscription / AudioTranscriptionStream RPCs against
|
|
## ggml-org/Qwen3-ASR-0.6B-GGUF (a small ASR model that requires its mmproj
|
|
## audio encoder companion). The audio fixture is a short public-domain
|
|
## "jfk.wav" clip ggml-org bundles with whisper.cpp's CI assets.
|
|
test-extra-backend-llama-cpp-transcription: docker-build-llama-cpp
|
|
BACKEND_IMAGE=local-ai-backend:llama-cpp \
|
|
BACKEND_TEST_MODEL_URL=https://huggingface.co/ggml-org/Qwen3-ASR-0.6B-GGUF/resolve/main/Qwen3-ASR-0.6B-Q8_0.gguf \
|
|
BACKEND_TEST_MMPROJ_URL=https://huggingface.co/ggml-org/Qwen3-ASR-0.6B-GGUF/resolve/main/mmproj-Qwen3-ASR-0.6B-Q8_0.gguf \
|
|
BACKEND_TEST_AUDIO_URL=https://github.com/ggml-org/whisper.cpp/raw/master/samples/jfk.wav \
|
|
BACKEND_TEST_CAPS=health,load,transcription \
|
|
BACKEND_TEST_CTX_SIZE=2048 \
|
|
$(MAKE) test-extra-backend
|
|
|
|
## vllm is resolved from a HuggingFace model id (no file download) and
|
|
## exercises Predict + streaming + tool-call extraction via the hermes parser.
|
|
## Requires a host CPU with the SIMD instructions the prebuilt vllm CPU
|
|
## wheel was compiled against (AVX-512 VNNI/BF16); older CPUs will SIGILL
|
|
## on import — on CI this means using the bigger-runner label.
|
|
test-extra-backend-vllm: docker-build-vllm
|
|
BACKEND_IMAGE=local-ai-backend:vllm \
|
|
BACKEND_TEST_MODEL_NAME=Qwen/Qwen2.5-0.5B-Instruct \
|
|
BACKEND_TEST_CAPS=health,load,predict,stream,tools \
|
|
BACKEND_TEST_OPTIONS=tool_parser:hermes \
|
|
$(MAKE) test-extra-backend
|
|
|
|
## vllm multi-node data-parallel smoke test. Runs LocalAI head + a
|
|
## `local-ai p2p-worker vllm` follower in docker compose against
|
|
## Qwen2.5-0.5B with data_parallel_size=2. Requires 2 NVIDIA GPUs and
|
|
## nvidia-container-runtime on the host — vLLM v1's DP coordinator is
|
|
## not viable on CPU so this cannot run in CI without GPU.
|
|
test-extra-backend-vllm-multinode:
|
|
./tests/e2e/vllm-multinode/smoke.sh
|
|
|
|
## tinygrad mirrors the vllm target (same model, same caps, same parser) so
|
|
## the two backends are directly comparable. The LLM path covers Predict,
|
|
## streaming and native tool-call extraction. Companion targets below cover
|
|
## embeddings, Stable Diffusion and Whisper — run them individually or via
|
|
## the `test-extra-backend-tinygrad-all` aggregate.
|
|
test-extra-backend-tinygrad: docker-build-tinygrad
|
|
BACKEND_IMAGE=local-ai-backend:tinygrad \
|
|
BACKEND_TEST_MODEL_NAME=Qwen/Qwen3-0.6B \
|
|
BACKEND_TEST_CAPS=health,load,predict,stream,tools \
|
|
BACKEND_TEST_OPTIONS=tool_parser:hermes \
|
|
$(MAKE) test-extra-backend
|
|
|
|
## tinygrad — embeddings via LLM last-hidden-state pooling. Reuses the same
|
|
## Qwen3-0.6B as the chat target so we don't need a separate BERT vendor;
|
|
## the Embedding RPC mean-pools and L2-normalizes the last-layer hidden
|
|
## state.
|
|
test-extra-backend-tinygrad-embeddings: docker-build-tinygrad
|
|
BACKEND_IMAGE=local-ai-backend:tinygrad \
|
|
BACKEND_TEST_MODEL_NAME=Qwen/Qwen3-0.6B \
|
|
BACKEND_TEST_CAPS=health,load,embeddings \
|
|
$(MAKE) test-extra-backend
|
|
|
|
## tinygrad — Stable Diffusion 1.5. The original CompVis/runwayml repos have
|
|
## been gated, so we use the community-maintained mirror at
|
|
## stable-diffusion-v1-5/stable-diffusion-v1-5 with the EMA-only pruned
|
|
## checkpoint (~4.3GB). Step count is kept low (4) so a CPU-only run finishes
|
|
## in a few minutes; bump BACKEND_TEST_IMAGE_STEPS for higher quality.
|
|
test-extra-backend-tinygrad-sd: docker-build-tinygrad
|
|
BACKEND_IMAGE=local-ai-backend:tinygrad \
|
|
BACKEND_TEST_MODEL_NAME=stable-diffusion-v1-5/stable-diffusion-v1-5 \
|
|
BACKEND_TEST_CAPS=health,load,image \
|
|
$(MAKE) test-extra-backend
|
|
|
|
## tinygrad — Whisper. Loads OpenAI's tiny.en checkpoint (smallest at ~75MB)
|
|
## from the original azure CDN through tinygrad's `fetch` helper, and
|
|
## transcribes the canonical jfk.wav fixture from whisper.cpp's CI samples.
|
|
## Exercises both AudioTranscription and AudioTranscriptionStream.
|
|
test-extra-backend-tinygrad-whisper: docker-build-tinygrad
|
|
BACKEND_IMAGE=local-ai-backend:tinygrad \
|
|
BACKEND_TEST_MODEL_NAME=openai/whisper-tiny.en \
|
|
BACKEND_TEST_AUDIO_URL=https://github.com/ggml-org/whisper.cpp/raw/master/samples/jfk.wav \
|
|
BACKEND_TEST_CAPS=health,load,transcription \
|
|
$(MAKE) test-extra-backend
|
|
|
|
test-extra-backend-tinygrad-all: \
|
|
test-extra-backend-tinygrad \
|
|
test-extra-backend-tinygrad-embeddings \
|
|
test-extra-backend-tinygrad-sd \
|
|
test-extra-backend-tinygrad-whisper
|
|
|
|
## insightface — face recognition.
|
|
##
|
|
## Face fixtures default to the sample images shipped in the
|
|
## deepinsight/insightface repository (MIT-licensed). For offline/local
|
|
## runs override with BACKEND_TEST_FACE_IMAGE_{1,2,3}_FILE pointing at
|
|
## local paths.
|
|
FACE_IMAGE_1_URL ?= https://github.com/deepinsight/insightface/raw/master/python-package/insightface/data/images/t1.jpg
|
|
FACE_IMAGE_2_URL ?= https://github.com/deepinsight/insightface/raw/master/python-package/insightface/data/images/t1.jpg
|
|
FACE_IMAGE_3_URL ?= https://github.com/deepinsight/insightface/raw/master/python-package/insightface/data/images/mask_white.jpg
|
|
## Known spoof fixture used by the face_antispoof e2e cap. This is
|
|
## upstream's own `image_F2.jpg` (Silent-Face repo, via yakhyo mirror)
|
|
## — verified to classify as is_real=false with score < 0.05 on the
|
|
## MiniFASNetV2 + MiniFASNetV1SE ensemble.
|
|
FACE_SPOOF_IMAGE_URL ?= https://github.com/yakhyo/face-anti-spoofing/raw/main/assets/image_F2.jpg
|
|
|
|
## Host-side cache for the OpenCV Zoo face ONNX files used by the
|
|
## opencv e2e target. The backend image no longer bakes model weights —
|
|
## gallery installs bring them via `files:` — but the e2e suite drives
|
|
## LoadModel over gRPC directly without going through the gallery. We
|
|
## pre-download the ONNX files to a stable host path and pass absolute
|
|
## paths in BACKEND_TEST_OPTIONS; `make` skips the downloads when the
|
|
## SHA-256 already matches.
|
|
INSIGHTFACE_OPENCV_DIR := /tmp/localai-insightface-opencv-cache
|
|
INSIGHTFACE_OPENCV_YUNET_URL := https://github.com/opencv/opencv_zoo/raw/main/models/face_detection_yunet/face_detection_yunet_2023mar.onnx
|
|
INSIGHTFACE_OPENCV_SFACE_URL := https://github.com/opencv/opencv_zoo/raw/main/models/face_recognition_sface/face_recognition_sface_2021dec.onnx
|
|
INSIGHTFACE_OPENCV_YUNET_SHA := 8f2383e4dd3cfbb4553ea8718107fc0423210dc964f9f4280604804ed2552fa4
|
|
INSIGHTFACE_OPENCV_SFACE_SHA := 0ba9fbfa01b5270c96627c4ef784da859931e02f04419c829e83484087c34e79
|
|
|
|
## buffalo_sc (insightface) — pack zip + SHA-256 mirrors the gallery
|
|
## entry so the e2e target matches exactly what `local-ai models install
|
|
## insightface-buffalo-sc` would have fetched. Smallest insightface pack
|
|
## (~16MB) — keeps CI fast while still covering the insightface engine
|
|
## code path end-to-end.
|
|
INSIGHTFACE_BUFFALO_SC_DIR := /tmp/localai-insightface-buffalo-sc-cache
|
|
INSIGHTFACE_BUFFALO_SC_URL := https://github.com/deepinsight/insightface/releases/download/v0.7/buffalo_sc.zip
|
|
INSIGHTFACE_BUFFALO_SC_SHA := 57d31b56b6ffa911c8a73cfc1707c73cab76efe7f13b675a05223bf42de47c72
|
|
|
|
## Silent-Face antispoofing (MiniFASNetV2 + MiniFASNetV1SE) — shared
|
|
## between the buffalo_sc and opencv e2e targets. Both ONNX files are
|
|
## ~1.7MB, Apache 2.0. URLs + SHAs mirror the gallery entries.
|
|
INSIGHTFACE_ANTISPOOF_DIR := /tmp/localai-insightface-antispoof-cache
|
|
INSIGHTFACE_ANTISPOOF_V2_URL := https://github.com/yakhyo/face-anti-spoofing/releases/download/weights/MiniFASNetV2.onnx
|
|
INSIGHTFACE_ANTISPOOF_V2_SHA := b32929adc2d9c34b9486f8c4c7bc97c1b69bc0ea9befefc380e4faae4e463907
|
|
INSIGHTFACE_ANTISPOOF_V1SE_URL := https://github.com/yakhyo/face-anti-spoofing/releases/download/weights/MiniFASNetV1SE.onnx
|
|
INSIGHTFACE_ANTISPOOF_V1SE_SHA := ebab7f90c7833fbccd46d3a555410e78d969db5438e169b6524be444862b3676
|
|
|
|
.PHONY: insightface-opencv-models
|
|
insightface-opencv-models:
|
|
@mkdir -p $(INSIGHTFACE_OPENCV_DIR)
|
|
@if [ "$$(sha256sum $(INSIGHTFACE_OPENCV_DIR)/yunet.onnx 2>/dev/null | awk '{print $$1}')" != "$(INSIGHTFACE_OPENCV_YUNET_SHA)" ]; then \
|
|
echo "Fetching YuNet..."; \
|
|
curl -fsSL -o $(INSIGHTFACE_OPENCV_DIR)/yunet.onnx $(INSIGHTFACE_OPENCV_YUNET_URL); \
|
|
echo "$(INSIGHTFACE_OPENCV_YUNET_SHA) $(INSIGHTFACE_OPENCV_DIR)/yunet.onnx" | sha256sum -c; \
|
|
fi
|
|
@if [ "$$(sha256sum $(INSIGHTFACE_OPENCV_DIR)/sface.onnx 2>/dev/null | awk '{print $$1}')" != "$(INSIGHTFACE_OPENCV_SFACE_SHA)" ]; then \
|
|
echo "Fetching SFace..."; \
|
|
curl -fsSL -o $(INSIGHTFACE_OPENCV_DIR)/sface.onnx $(INSIGHTFACE_OPENCV_SFACE_URL); \
|
|
echo "$(INSIGHTFACE_OPENCV_SFACE_SHA) $(INSIGHTFACE_OPENCV_DIR)/sface.onnx" | sha256sum -c; \
|
|
fi
|
|
|
|
.PHONY: insightface-antispoof-models
|
|
insightface-antispoof-models:
|
|
@mkdir -p $(INSIGHTFACE_ANTISPOOF_DIR)
|
|
@if [ "$$(sha256sum $(INSIGHTFACE_ANTISPOOF_DIR)/MiniFASNetV2.onnx 2>/dev/null | awk '{print $$1}')" != "$(INSIGHTFACE_ANTISPOOF_V2_SHA)" ]; then \
|
|
echo "Fetching MiniFASNetV2..."; \
|
|
curl -fsSL -o $(INSIGHTFACE_ANTISPOOF_DIR)/MiniFASNetV2.onnx $(INSIGHTFACE_ANTISPOOF_V2_URL); \
|
|
echo "$(INSIGHTFACE_ANTISPOOF_V2_SHA) $(INSIGHTFACE_ANTISPOOF_DIR)/MiniFASNetV2.onnx" | sha256sum -c; \
|
|
fi
|
|
@if [ "$$(sha256sum $(INSIGHTFACE_ANTISPOOF_DIR)/MiniFASNetV1SE.onnx 2>/dev/null | awk '{print $$1}')" != "$(INSIGHTFACE_ANTISPOOF_V1SE_SHA)" ]; then \
|
|
echo "Fetching MiniFASNetV1SE..."; \
|
|
curl -fsSL -o $(INSIGHTFACE_ANTISPOOF_DIR)/MiniFASNetV1SE.onnx $(INSIGHTFACE_ANTISPOOF_V1SE_URL); \
|
|
echo "$(INSIGHTFACE_ANTISPOOF_V1SE_SHA) $(INSIGHTFACE_ANTISPOOF_DIR)/MiniFASNetV1SE.onnx" | sha256sum -c; \
|
|
fi
|
|
|
|
.PHONY: insightface-buffalo-sc-models
|
|
insightface-buffalo-sc-models:
|
|
@mkdir -p $(INSIGHTFACE_BUFFALO_SC_DIR)
|
|
@if [ "$$(sha256sum $(INSIGHTFACE_BUFFALO_SC_DIR)/buffalo_sc.zip 2>/dev/null | awk '{print $$1}')" != "$(INSIGHTFACE_BUFFALO_SC_SHA)" ]; then \
|
|
echo "Fetching buffalo_sc..."; \
|
|
curl -fsSL -o $(INSIGHTFACE_BUFFALO_SC_DIR)/buffalo_sc.zip $(INSIGHTFACE_BUFFALO_SC_URL); \
|
|
echo "$(INSIGHTFACE_BUFFALO_SC_SHA) $(INSIGHTFACE_BUFFALO_SC_DIR)/buffalo_sc.zip" | sha256sum -c; \
|
|
rm -f $(INSIGHTFACE_BUFFALO_SC_DIR)/*.onnx; \
|
|
fi
|
|
@if [ ! -f "$(INSIGHTFACE_BUFFALO_SC_DIR)/det_500m.onnx" ]; then \
|
|
echo "Extracting buffalo_sc..."; \
|
|
unzip -o -q $(INSIGHTFACE_BUFFALO_SC_DIR)/buffalo_sc.zip -d $(INSIGHTFACE_BUFFALO_SC_DIR); \
|
|
fi
|
|
|
|
## buffalo_sc — smallest insightface pack (SCRFD-500MF detector + MBF
|
|
## recognizer, ~16MB). Exercises the insightface engine code path
|
|
## (model_zoo-backed inference) without the ~326MB buffalo_l download.
|
|
## No age/gender/landmark heads — face_analyze is dropped from caps.
|
|
## The pack is pre-fetched on the host and passed as `root:<dir>` since
|
|
## the e2e suite drives LoadModel directly without going through
|
|
## LocalAI's gallery flow (which is what would normally populate
|
|
## ModelPath and in turn the engine's `_model_dir` option).
|
|
test-extra-backend-insightface-buffalo-sc: docker-build-insightface insightface-buffalo-sc-models insightface-antispoof-models
|
|
BACKEND_IMAGE=local-ai-backend:insightface \
|
|
BACKEND_TEST_MODEL_NAME=insightface-buffalo-sc \
|
|
BACKEND_TEST_OPTIONS=engine:insightface,model_pack:buffalo_sc,root:$(INSIGHTFACE_BUFFALO_SC_DIR),antispoof_v2_onnx:$(INSIGHTFACE_ANTISPOOF_DIR)/MiniFASNetV2.onnx,antispoof_v1se_onnx:$(INSIGHTFACE_ANTISPOOF_DIR)/MiniFASNetV1SE.onnx \
|
|
BACKEND_TEST_CAPS=health,load,face_detect,face_embed,face_verify,face_antispoof \
|
|
BACKEND_TEST_FACE_IMAGE_1_URL=$(FACE_IMAGE_1_URL) \
|
|
BACKEND_TEST_FACE_IMAGE_2_URL=$(FACE_IMAGE_2_URL) \
|
|
BACKEND_TEST_FACE_IMAGE_3_URL=$(FACE_IMAGE_3_URL) \
|
|
BACKEND_TEST_FACE_SPOOF_IMAGE_URL=$(FACE_SPOOF_IMAGE_URL) \
|
|
BACKEND_TEST_VERIFY_DISTANCE_CEILING=0.55 \
|
|
$(MAKE) test-extra-backend
|
|
|
|
## OpenCV Zoo YuNet + SFace — Apache 2.0, commercial-safe. face_analyze
|
|
## cap is dropped (SFace has no demographic head). The ONNX files are
|
|
## pre-fetched on the host via the insightface-opencv-models target and
|
|
## passed as absolute paths, since the e2e suite drives LoadModel
|
|
## directly without going through LocalAI's gallery flow.
|
|
test-extra-backend-insightface-opencv: docker-build-insightface insightface-opencv-models insightface-antispoof-models
|
|
BACKEND_IMAGE=local-ai-backend:insightface \
|
|
BACKEND_TEST_MODEL_NAME=insightface-opencv \
|
|
BACKEND_TEST_OPTIONS=engine:onnx_direct,detector_onnx:$(INSIGHTFACE_OPENCV_DIR)/yunet.onnx,recognizer_onnx:$(INSIGHTFACE_OPENCV_DIR)/sface.onnx,antispoof_v2_onnx:$(INSIGHTFACE_ANTISPOOF_DIR)/MiniFASNetV2.onnx,antispoof_v1se_onnx:$(INSIGHTFACE_ANTISPOOF_DIR)/MiniFASNetV1SE.onnx \
|
|
BACKEND_TEST_CAPS=health,load,face_detect,face_embed,face_verify,face_antispoof \
|
|
BACKEND_TEST_FACE_IMAGE_1_URL=$(FACE_IMAGE_1_URL) \
|
|
BACKEND_TEST_FACE_IMAGE_2_URL=$(FACE_IMAGE_2_URL) \
|
|
BACKEND_TEST_FACE_IMAGE_3_URL=$(FACE_IMAGE_3_URL) \
|
|
BACKEND_TEST_FACE_SPOOF_IMAGE_URL=$(FACE_SPOOF_IMAGE_URL) \
|
|
BACKEND_TEST_VERIFY_DISTANCE_CEILING=0.55 \
|
|
$(MAKE) test-extra-backend
|
|
|
|
## Aggregate — runs both face-recognition model configurations so CI
|
|
## catches regressions across engines together.
|
|
test-extra-backend-insightface-all: \
|
|
test-extra-backend-insightface-buffalo-sc \
|
|
test-extra-backend-insightface-opencv
|
|
|
|
## speaker-recognition — voice (speaker) biometrics.
|
|
##
|
|
## Audio fixtures default to the speechbrain test samples served
|
|
## straight from their GitHub repo — public, no auth needed, and they
|
|
## ship as 16kHz mono WAV/FLAC which is exactly what the engine wants.
|
|
## example{1,2,5} are three different speakers; the suite treats
|
|
## example1 as the "same-image twin" probe (verify(clip, clip) must
|
|
## return distance≈0) and the other two as cross-speaker ceilings.
|
|
## Override with BACKEND_TEST_VOICE_AUDIO_{1,2,3}_FILE for offline runs.
|
|
VOICE_AUDIO_1_URL ?= https://github.com/speechbrain/speechbrain/raw/develop/tests/samples/single-mic/example1.wav
|
|
VOICE_AUDIO_2_URL ?= https://github.com/speechbrain/speechbrain/raw/develop/tests/samples/single-mic/example2.flac
|
|
VOICE_AUDIO_3_URL ?= https://github.com/speechbrain/speechbrain/raw/develop/tests/samples/single-mic/example5.wav
|
|
|
|
## ECAPA-TDNN via SpeechBrain — default CI configuration. Auto-downloads
|
|
## the checkpoint from HuggingFace on first LoadModel (bundled in the
|
|
## backend image pip install). 192-d embeddings, cosine-distance based.
|
|
## The e2e suite drives LoadModel directly so we don't rely on LocalAI's
|
|
## gallery flow here.
|
|
test-extra-backend-speaker-recognition-ecapa: docker-build-speaker-recognition
|
|
BACKEND_IMAGE=local-ai-backend:speaker-recognition \
|
|
BACKEND_TEST_MODEL_NAME=speechbrain/spkrec-ecapa-voxceleb \
|
|
BACKEND_TEST_OPTIONS=engine:speechbrain,source:speechbrain/spkrec-ecapa-voxceleb \
|
|
BACKEND_TEST_CAPS=health,load,voice_embed,voice_verify \
|
|
BACKEND_TEST_VOICE_AUDIO_1_URL=$(VOICE_AUDIO_1_URL) \
|
|
BACKEND_TEST_VOICE_AUDIO_2_URL=$(VOICE_AUDIO_2_URL) \
|
|
BACKEND_TEST_VOICE_AUDIO_3_URL=$(VOICE_AUDIO_3_URL) \
|
|
BACKEND_TEST_VOICE_VERIFY_DISTANCE_CEILING=0.4 \
|
|
$(MAKE) test-extra-backend
|
|
|
|
## Aggregate — today there's only one voice config; the target exists
|
|
## so the CI workflow matches the insightface-all naming convention and
|
|
## can grow to include WeSpeaker / 3D-Speaker later.
|
|
test-extra-backend-speaker-recognition-all: \
|
|
test-extra-backend-speaker-recognition-ecapa
|
|
|
|
## Realtime e2e with sherpa-onnx driving VAD + STT + TTS against a mocked
|
|
## LLM. Extracts the sherpa-onnx Docker image rootfs, downloads the three
|
|
## gallery-referenced model bundles (silero-vad, omnilingual-asr, vits-ljs),
|
|
## writes the corresponding model config YAMLs, and runs the realtime
|
|
## websocket spec in tests/e2e with REALTIME_* env vars wiring the sherpa
|
|
## slots into the pipeline. The LLM slot stays on the in-repo mock-backend
|
|
## registered unconditionally by tests/e2e/e2e_suite_test.go. See
|
|
## tests/e2e/run-realtime-sherpa.sh for the full orchestration.
|
|
test-extra-e2e-realtime-sherpa: build-mock-backend docker-build-sherpa-onnx protogen-go react-ui
|
|
bash tests/e2e/run-realtime-sherpa.sh
|
|
|
|
## Streaming ASR via the sherpa-onnx online recognizer. Uses the streaming
|
|
## zipformer English model (encoder/decoder/joiner int8 + tokens) from the
|
|
## sherpa-onnx gallery entry. Drives both AudioTranscription and
|
|
## AudioTranscriptionStream via the e2e-backends gRPC harness; streaming
|
|
## emits real partial deltas during decode. Each file is renamed on download
|
|
## to the shape sherpa-onnx's online loader expects (encoder.int8.onnx etc.).
|
|
test-extra-backend-sherpa-onnx-transcription: docker-build-sherpa-onnx
|
|
BACKEND_IMAGE=local-ai-backend:sherpa-onnx \
|
|
BACKEND_TEST_MODEL_URL='https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-06-26/resolve/main/encoder-epoch-99-avg-1-chunk-16-left-128.int8.onnx#encoder.int8.onnx' \
|
|
BACKEND_TEST_EXTRA_FILES='https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-06-26/resolve/main/decoder-epoch-99-avg-1-chunk-16-left-128.int8.onnx#decoder.int8.onnx|https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-06-26/resolve/main/joiner-epoch-99-avg-1-chunk-16-left-128.int8.onnx#joiner.int8.onnx|https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-06-26/resolve/main/tokens.txt' \
|
|
BACKEND_TEST_AUDIO_URL=https://github.com/ggml-org/whisper.cpp/raw/master/samples/jfk.wav \
|
|
BACKEND_TEST_CAPS=health,load,transcription \
|
|
BACKEND_TEST_OPTIONS=subtype=online \
|
|
$(MAKE) test-extra-backend
|
|
|
|
## VITS TTS via the sherpa-onnx backend. Pulls the individual files from
|
|
## HuggingFace (the vits-ljs release tarball lives on the k2-fsa github
|
|
## but is also mirrored as discrete files on HF). Exercises both
|
|
## TTS (write-to-file) and TTSStream (PCM chunks + WAV header) via the
|
|
## e2e-backends gRPC harness.
|
|
test-extra-backend-sherpa-onnx-tts: docker-build-sherpa-onnx
|
|
BACKEND_IMAGE=local-ai-backend:sherpa-onnx \
|
|
BACKEND_TEST_MODEL_URL='https://huggingface.co/csukuangfj/vits-ljs/resolve/main/vits-ljs.onnx#vits-ljs.onnx' \
|
|
BACKEND_TEST_EXTRA_FILES='https://huggingface.co/csukuangfj/vits-ljs/resolve/main/tokens.txt|https://huggingface.co/csukuangfj/vits-ljs/resolve/main/lexicon.txt' \
|
|
BACKEND_TEST_CAPS=health,load,tts \
|
|
$(MAKE) test-extra-backend
|
|
|
|
## VibeVoice TTS via the vibevoice-cpp backend. ModelFile is the
|
|
## realtime gguf; the supplementary tokenizer + voice prompt land
|
|
## alongside it under the harness's models dir and are wired through
|
|
## via the standard Options[] convention (tokenizer=, voice=).
|
|
test-extra-backend-vibevoice-cpp-tts: docker-build-vibevoice-cpp
|
|
BACKEND_IMAGE=local-ai-backend:vibevoice-cpp \
|
|
BACKEND_TEST_MODEL_URL='https://huggingface.co/mudler/vibevoice.cpp-models/resolve/main/vibevoice-realtime-0.5B-q8_0.gguf#vibevoice-realtime-0.5B-q8_0.gguf' \
|
|
BACKEND_TEST_EXTRA_FILES='https://huggingface.co/mudler/vibevoice.cpp-models/resolve/main/tokenizer.gguf#tokenizer.gguf|https://huggingface.co/mudler/vibevoice.cpp-models/resolve/main/voice-en-Carter_man.gguf#voice-en-Carter_man.gguf' \
|
|
BACKEND_TEST_OPTIONS=tokenizer:tokenizer.gguf,voice:voice-en-Carter_man.gguf \
|
|
BACKEND_TEST_CAPS=health,load,tts \
|
|
$(MAKE) test-extra-backend
|
|
|
|
## VibeVoice ASR (long-form, with diarization). type=asr tells the
|
|
## backend's Load() to slot ModelFile into the asr_model role; the
|
|
## tokenizer is supplied via Options[]. Uses the Q4_K quant (~10 GB)
|
|
## rather than Q8_0 (~14 GB) so the bundle fits inside ubuntu-latest's
|
|
## post-image disk budget.
|
|
test-extra-backend-vibevoice-cpp-transcription: docker-build-vibevoice-cpp
|
|
BACKEND_IMAGE=local-ai-backend:vibevoice-cpp \
|
|
BACKEND_TEST_MODEL_URL='https://huggingface.co/mudler/vibevoice.cpp-models/resolve/main/vibevoice-asr-q4_k.gguf#vibevoice-asr-q4_k.gguf' \
|
|
BACKEND_TEST_EXTRA_FILES='https://huggingface.co/mudler/vibevoice.cpp-models/resolve/main/tokenizer.gguf#tokenizer.gguf' \
|
|
BACKEND_TEST_AUDIO_URL=https://github.com/ggml-org/whisper.cpp/raw/master/samples/jfk.wav \
|
|
BACKEND_TEST_OPTIONS=type:asr,tokenizer:tokenizer.gguf \
|
|
BACKEND_TEST_CAPS=health,load,transcription \
|
|
$(MAKE) test-extra-backend
|
|
|
|
## Audio transcription wrapper for the whisper.cpp backend.
|
|
## Drives the AudioTranscription / AudioTranscriptionStream RPCs against
|
|
## ggml-base.en (~145 MB) using the JFK 11s clip. The streaming spec
|
|
## asserts len(deltas) >= 1 and concat(deltas) == final.Text - whisper-
|
|
## specific multi-segment assertions live in backend/go/whisper/gowhisper_test.go.
|
|
test-extra-backend-whisper-transcription: docker-build-whisper
|
|
BACKEND_IMAGE=local-ai-backend:whisper \
|
|
BACKEND_TEST_MODEL_URL=https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin \
|
|
BACKEND_TEST_AUDIO_URL=https://github.com/ggml-org/whisper.cpp/raw/master/samples/jfk.wav \
|
|
BACKEND_TEST_CAPS=health,load,transcription \
|
|
$(MAKE) test-extra-backend
|
|
|
|
## Audio transcription wrapper for the parakeet-cpp (parakeet.cpp ggml port)
|
|
## backend. Mirrors test-extra-backend-whisper-transcription: drives the
|
|
## AudioTranscription / AudioTranscriptionStream RPCs against a published
|
|
## Parakeet GGUF using the JFK 11s clip from whisper.cpp's CI samples. Not
|
|
## part of the default test suite - run explicitly once the pinned model URL
|
|
## is reachable.
|
|
test-extra-backend-parakeet-cpp-transcription: docker-build-parakeet-cpp
|
|
BACKEND_IMAGE=local-ai-backend:parakeet-cpp \
|
|
BACKEND_TEST_MODEL_URL=https://huggingface.co/mudler/parakeet-cpp-gguf/resolve/main/tdt_ctc-110m-f16.gguf \
|
|
BACKEND_TEST_AUDIO_URL=https://github.com/ggml-org/whisper.cpp/raw/master/samples/jfk.wav \
|
|
BACKEND_TEST_CAPS=health,load,transcription \
|
|
$(MAKE) test-extra-backend
|
|
|
|
## LocalVQE audio transform (joint AEC + noise suppression + dereverb).
|
|
## Exercises the audio_transform capability end-to-end: batch transform
|
|
## of a real WAV fixture and bidi streaming of synthetic silent frames.
|
|
test-extra-backend-localvqe-transform: docker-build-localvqe
|
|
BACKEND_IMAGE=local-ai-backend:localvqe \
|
|
BACKEND_TEST_MODEL_URL='https://huggingface.co/LocalAI-io/LocalVQE/resolve/main/localvqe-v1-1.3M-f32.gguf#localvqe-v1-1.3M-f32.gguf' \
|
|
BACKEND_TEST_AUDIO_URL=https://github.com/ggml-org/whisper.cpp/raw/master/samples/jfk.wav \
|
|
BACKEND_TEST_CAPS=health,load,audio_transform \
|
|
$(MAKE) test-extra-backend
|
|
|
|
## sglang mirrors the vllm setup: HuggingFace model id, same tiny Qwen,
|
|
## tool-call extraction via sglang's native qwen parser. CPU builds use
|
|
## sglang's upstream pyproject_cpu.toml recipe (see backend/python/sglang/install.sh).
|
|
test-extra-backend-sglang: docker-build-sglang
|
|
BACKEND_IMAGE=local-ai-backend:sglang \
|
|
BACKEND_TEST_MODEL_NAME=Qwen/Qwen2.5-0.5B-Instruct \
|
|
BACKEND_TEST_CAPS=health,load,predict,stream,tools \
|
|
BACKEND_TEST_OPTIONS=tool_parser:qwen \
|
|
$(MAKE) test-extra-backend
|
|
|
|
|
|
## mlx is Apple-Silicon-first — the MLX backend auto-detects the right tool
|
|
## parser from the chat template, so no tool_parser: option is needed (it
|
|
## would be ignored at runtime). Run this on macOS / arm64 with Metal; the
|
|
## Linux/CPU mlx variant is untested in CI.
|
|
test-extra-backend-mlx: docker-build-mlx
|
|
BACKEND_IMAGE=local-ai-backend:mlx \
|
|
BACKEND_TEST_MODEL_NAME=mlx-community/Qwen2.5-0.5B-Instruct-4bit \
|
|
BACKEND_TEST_CAPS=health,load,predict,stream,tools \
|
|
$(MAKE) test-extra-backend
|
|
|
|
test-extra-backend-mlx-vlm: docker-build-mlx-vlm
|
|
BACKEND_IMAGE=local-ai-backend:mlx-vlm \
|
|
BACKEND_TEST_MODEL_NAME=mlx-community/Qwen2.5-0.5B-Instruct-4bit \
|
|
BACKEND_TEST_CAPS=health,load,predict,stream,tools \
|
|
$(MAKE) test-extra-backend
|
|
|
|
DOCKER_IMAGE?=local-ai
|
|
IMAGE_TYPE?=core
|
|
BASE_IMAGE?=ubuntu:24.04
|
|
|
|
docker:
|
|
docker build \
|
|
--build-arg BASE_IMAGE=$(BASE_IMAGE) \
|
|
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
|
|
--build-arg GO_TAGS="$(GO_TAGS)" \
|
|
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
|
|
--build-arg BUILD_TYPE=$(BUILD_TYPE) \
|
|
--build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) \
|
|
--build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) \
|
|
--build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \
|
|
--build-arg UBUNTU_CODENAME=$(UBUNTU_CODENAME) \
|
|
--build-arg APT_MIRROR=$(APT_MIRROR) \
|
|
--build-arg APT_PORTS_MIRROR=$(APT_PORTS_MIRROR) \
|
|
-t $(DOCKER_IMAGE) .
|
|
|
|
docker-cuda12:
|
|
docker build \
|
|
--build-arg CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} \
|
|
--build-arg CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} \
|
|
--build-arg BASE_IMAGE=$(BASE_IMAGE) \
|
|
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
|
|
--build-arg GO_TAGS="$(GO_TAGS)" \
|
|
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
|
|
--build-arg BUILD_TYPE=$(BUILD_TYPE) \
|
|
--build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \
|
|
--build-arg UBUNTU_CODENAME=$(UBUNTU_CODENAME) \
|
|
--build-arg APT_MIRROR=$(APT_MIRROR) \
|
|
--build-arg APT_PORTS_MIRROR=$(APT_PORTS_MIRROR) \
|
|
-t $(DOCKER_IMAGE)-cuda-12 .
|
|
|
|
docker-image-intel:
|
|
docker build \
|
|
--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.3.2-0-devel-ubuntu24.04 \
|
|
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
|
|
--build-arg GO_TAGS="$(GO_TAGS)" \
|
|
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
|
|
--build-arg BUILD_TYPE=intel \
|
|
--build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) \
|
|
--build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) \
|
|
--build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \
|
|
--build-arg UBUNTU_CODENAME=$(UBUNTU_CODENAME) \
|
|
--build-arg APT_MIRROR=$(APT_MIRROR) \
|
|
--build-arg APT_PORTS_MIRROR=$(APT_PORTS_MIRROR) \
|
|
-t $(DOCKER_IMAGE) .
|
|
|
|
########################################################
|
|
## Backends
|
|
########################################################
|
|
|
|
# Pattern rule for standard backends (docker-based)
|
|
# This matches all backends that use docker-build-* and docker-save-*
|
|
backends/%: docker-build-% docker-save-% build
|
|
./local-ai backends install "ocifile://$(abspath ./backend-images/$*.tar)"
|
|
|
|
# Darwin-specific backends (keep as explicit targets since they have special build logic)
|
|
backends/llama-cpp-darwin: build
|
|
bash ./scripts/build/llama-cpp-darwin.sh
|
|
./local-ai backends install "ocifile://$(abspath ./backend-images/llama-cpp.tar)"
|
|
|
|
backends/ds4-darwin: build
|
|
bash ./scripts/build/ds4-darwin.sh
|
|
./local-ai backends install "ocifile://$(abspath ./backend-images/ds4.tar)"
|
|
|
|
build-darwin-python-backend: build
|
|
bash ./scripts/build/python-darwin.sh
|
|
|
|
build-darwin-go-backend: build
|
|
bash ./scripts/build/golang-darwin.sh
|
|
|
|
backends/mlx:
|
|
BACKEND=mlx $(MAKE) build-darwin-python-backend
|
|
./local-ai backends install "ocifile://$(abspath ./backend-images/mlx.tar)"
|
|
|
|
backends/diffuser-darwin:
|
|
BACKEND=diffusers $(MAKE) build-darwin-python-backend
|
|
./local-ai backends install "ocifile://$(abspath ./backend-images/diffusers.tar)"
|
|
|
|
backends/mlx-vlm:
|
|
BACKEND=mlx-vlm $(MAKE) build-darwin-python-backend
|
|
./local-ai backends install "ocifile://$(abspath ./backend-images/mlx-vlm.tar)"
|
|
|
|
backends/mlx-audio:
|
|
BACKEND=mlx-audio $(MAKE) build-darwin-python-backend
|
|
./local-ai backends install "ocifile://$(abspath ./backend-images/mlx-audio.tar)"
|
|
|
|
backends/mlx-distributed:
|
|
BACKEND=mlx-distributed $(MAKE) build-darwin-python-backend
|
|
./local-ai backends install "ocifile://$(abspath ./backend-images/mlx-distributed.tar)"
|
|
|
|
backends/stablediffusion-ggml-darwin:
|
|
BACKEND=stablediffusion-ggml BUILD_TYPE=metal $(MAKE) build-darwin-go-backend
|
|
./local-ai backends install "ocifile://$(abspath ./backend-images/stablediffusion-ggml.tar)"
|
|
|
|
backend-images:
|
|
mkdir -p backend-images
|
|
|
|
# Backend metadata: BACKEND_NAME | DOCKERFILE_TYPE | BUILD_CONTEXT | PROGRESS_FLAG | NEEDS_BACKEND_ARG
|
|
# llama-cpp is special - uses llama-cpp Dockerfile and doesn't need BACKEND arg
|
|
BACKEND_LLAMA_CPP = llama-cpp|llama-cpp|.|false|false
|
|
# ik-llama-cpp is a fork of llama.cpp with superior CPU performance
|
|
BACKEND_IK_LLAMA_CPP = ik-llama-cpp|ik-llama-cpp|.|false|false
|
|
# turboquant is a llama.cpp fork with TurboQuant KV-cache quantization.
|
|
# Reuses backend/cpp/llama-cpp grpc-server sources via a thin wrapper Makefile.
|
|
BACKEND_TURBOQUANT = turboquant|turboquant|.|false|false
|
|
# ds4 is antirez/ds4, a DeepSeek V4 Flash-specific inference engine.
|
|
# Single-model; hardware-only validation lives at tests/e2e-backends/
|
|
# (BACKEND_BINARY mode); see docs/superpowers/plans/2026-05-11-ds4-backend.md.
|
|
BACKEND_DS4 = ds4|ds4|.|false|false
|
|
|
|
# Golang backends
|
|
BACKEND_PIPER = piper|golang|.|false|true
|
|
BACKEND_LOCAL_STORE = local-store|golang|.|false|true
|
|
BACKEND_CLOUD_PROXY = cloud-proxy|golang|.|false|true
|
|
BACKEND_HUGGINGFACE = huggingface|golang|.|false|true
|
|
BACKEND_SILERO_VAD = silero-vad|golang|.|false|true
|
|
BACKEND_STABLEDIFFUSION_GGML = stablediffusion-ggml|golang|.|--progress=plain|true
|
|
BACKEND_WHISPER = whisper|golang|.|false|true
|
|
BACKEND_CRISPASR = crispasr|golang|.|false|true
|
|
BACKEND_PARAKEET_CPP = parakeet-cpp|golang|.|false|true
|
|
BACKEND_VOXTRAL = voxtral|golang|.|false|true
|
|
BACKEND_ACESTEP_CPP = acestep-cpp|golang|.|false|true
|
|
BACKEND_QWEN3_TTS_CPP = qwen3-tts-cpp|golang|.|false|true
|
|
BACKEND_VIBEVOICE_CPP = vibevoice-cpp|golang|.|false|true
|
|
BACKEND_LOCALVQE = localvqe|golang|.|false|true
|
|
BACKEND_OPUS = opus|golang|.|false|true
|
|
BACKEND_SHERPA_ONNX = sherpa-onnx|golang|.|false|true
|
|
|
|
# Python backends with root context
|
|
BACKEND_RERANKERS = rerankers|python|.|false|true
|
|
BACKEND_TRANSFORMERS = transformers|python|.|false|true
|
|
BACKEND_OUTETTS = outetts|python|.|false|true
|
|
BACKEND_FASTER_WHISPER = faster-whisper|python|.|false|true
|
|
BACKEND_COQUI = coqui|python|.|false|true
|
|
BACKEND_RFDETR = rfdetr|python|.|false|true
|
|
BACKEND_INSIGHTFACE = insightface|python|.|false|true
|
|
BACKEND_SPEAKER_RECOGNITION = speaker-recognition|python|.|false|true
|
|
BACKEND_KITTEN_TTS = kitten-tts|python|.|false|true
|
|
BACKEND_NEUTTS = neutts|python|.|false|true
|
|
BACKEND_KOKORO = kokoro|python|.|false|true
|
|
BACKEND_VLLM = vllm|python|.|false|true
|
|
BACKEND_VLLM_OMNI = vllm-omni|python|.|false|true
|
|
BACKEND_SGLANG = sglang|python|.|false|true
|
|
BACKEND_DIFFUSERS = diffusers|python|.|--progress=plain|true
|
|
BACKEND_CHATTERBOX = chatterbox|python|.|false|true
|
|
BACKEND_VIBEVOICE = vibevoice|python|.|--progress=plain|true
|
|
BACKEND_LIQUID_AUDIO = liquid-audio|python|.|--progress=plain|true
|
|
BACKEND_MOONSHINE = moonshine|python|.|false|true
|
|
BACKEND_POCKET_TTS = pocket-tts|python|.|false|true
|
|
BACKEND_QWEN_TTS = qwen-tts|python|.|false|true
|
|
BACKEND_FISH_SPEECH = fish-speech|python|.|false|true
|
|
BACKEND_FASTER_QWEN3_TTS = faster-qwen3-tts|python|.|false|true
|
|
BACKEND_QWEN_ASR = qwen-asr|python|.|false|true
|
|
BACKEND_NEMO = nemo|python|.|false|true
|
|
BACKEND_VOXCPM = voxcpm|python|.|false|true
|
|
BACKEND_WHISPERX = whisperx|python|.|false|true
|
|
BACKEND_ACE_STEP = ace-step|python|.|false|true
|
|
BACKEND_MLX = mlx|python|.|false|true
|
|
BACKEND_MLX_VLM = mlx-vlm|python|.|false|true
|
|
BACKEND_MLX_DISTRIBUTED = mlx-distributed|python|./|false|true
|
|
BACKEND_TRL = trl|python|.|false|true
|
|
BACKEND_LLAMA_CPP_QUANTIZATION = llama-cpp-quantization|python|.|false|true
|
|
BACKEND_TINYGRAD = tinygrad|python|.|false|true
|
|
|
|
# Rust backends
|
|
BACKEND_KOKOROS = kokoros|rust|.|false|true
|
|
|
|
# C++ backends (Go wrapper with purego)
|
|
BACKEND_SAM3_CPP = sam3-cpp|golang|.|false|true
|
|
BACKEND_RFDETR_CPP = rfdetr-cpp|golang|.|false|true
|
|
|
|
# Helper function to build docker image for a backend
|
|
# Usage: $(call docker-build-backend,BACKEND_NAME,DOCKERFILE_TYPE,BUILD_CONTEXT,PROGRESS_FLAG,NEEDS_BACKEND_ARG)
|
|
define docker-build-backend
|
|
docker build $(if $(filter-out false,$(4)),$(4)) \
|
|
--build-arg BUILD_TYPE=$(BUILD_TYPE) \
|
|
--build-arg BASE_IMAGE=$(BASE_IMAGE) \
|
|
--build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) \
|
|
--build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) \
|
|
--build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \
|
|
--build-arg UBUNTU_CODENAME=$(UBUNTU_CODENAME) \
|
|
--build-arg APT_MIRROR=$(APT_MIRROR) \
|
|
--build-arg APT_PORTS_MIRROR=$(APT_PORTS_MIRROR) \
|
|
$(if $(FROM_SOURCE),--build-arg FROM_SOURCE=$(FROM_SOURCE)) \
|
|
$(if $(AMDGPU_TARGETS),--build-arg AMDGPU_TARGETS=$(AMDGPU_TARGETS)) \
|
|
$(if $(filter true,$(5)),--build-arg BACKEND=$(1)) \
|
|
-t local-ai-backend:$(1) -f backend/Dockerfile.$(2) $(3)
|
|
endef
|
|
|
|
# Generate docker-build targets from backend definitions
|
|
define generate-docker-build-target
|
|
docker-build-$(word 1,$(subst |, ,$(1))):
|
|
$$(call docker-build-backend,$(word 1,$(subst |, ,$(1))),$(word 2,$(subst |, ,$(1))),$(word 3,$(subst |, ,$(1))),$(word 4,$(subst |, ,$(1))),$(word 5,$(subst |, ,$(1))))
|
|
endef
|
|
|
|
# Generate all docker-build targets
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_LLAMA_CPP)))
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_IK_LLAMA_CPP)))
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_TURBOQUANT)))
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_DS4)))
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_PIPER)))
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_LOCAL_STORE)))
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_CLOUD_PROXY)))
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_HUGGINGFACE)))
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_SILERO_VAD)))
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_STABLEDIFFUSION_GGML)))
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_WHISPER)))
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_CRISPASR)))
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_PARAKEET_CPP)))
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_VOXTRAL)))
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_OPUS)))
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_RERANKERS)))
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_TRANSFORMERS)))
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_OUTETTS)))
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_FASTER_WHISPER)))
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_COQUI)))
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_RFDETR)))
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_INSIGHTFACE)))
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_SPEAKER_RECOGNITION)))
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_KITTEN_TTS)))
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_NEUTTS)))
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_KOKORO)))
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_VLLM)))
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_VLLM_OMNI)))
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_SGLANG)))
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_DIFFUSERS)))
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_CHATTERBOX)))
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_VIBEVOICE)))
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_LIQUID_AUDIO)))
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_MOONSHINE)))
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_POCKET_TTS)))
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_QWEN_TTS)))
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_FISH_SPEECH)))
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_FASTER_QWEN3_TTS)))
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_QWEN_ASR)))
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_NEMO)))
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_VOXCPM)))
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_WHISPERX)))
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_ACE_STEP)))
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_ACESTEP_CPP)))
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_QWEN3_TTS_CPP)))
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_VIBEVOICE_CPP)))
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_LOCALVQE)))
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_MLX)))
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_MLX_VLM)))
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_MLX_DISTRIBUTED)))
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_TRL)))
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_LLAMA_CPP_QUANTIZATION)))
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_TINYGRAD)))
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_KOKOROS)))
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_SAM3_CPP)))
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_RFDETR_CPP)))
|
|
$(eval $(call generate-docker-build-target,$(BACKEND_SHERPA_ONNX)))
|
|
|
|
# Pattern rule for docker-save targets
|
|
docker-save-%: backend-images
|
|
docker save local-ai-backend:$* -o backend-images/$*.tar
|
|
|
|
docker-build-backends: docker-build-llama-cpp docker-build-ik-llama-cpp docker-build-turboquant docker-build-ds4 docker-build-rerankers docker-build-vllm docker-build-vllm-omni docker-build-sglang docker-build-transformers docker-build-outetts docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-crispasr docker-build-coqui docker-build-chatterbox docker-build-vibevoice docker-build-liquid-audio docker-build-moonshine docker-build-pocket-tts docker-build-qwen-tts docker-build-fish-speech docker-build-faster-qwen3-tts docker-build-qwen-asr docker-build-nemo docker-build-voxcpm docker-build-whisperx docker-build-ace-step docker-build-acestep-cpp docker-build-voxtral docker-build-mlx-distributed docker-build-trl docker-build-llama-cpp-quantization docker-build-tinygrad docker-build-kokoros docker-build-sam3-cpp docker-build-rfdetr-cpp docker-build-qwen3-tts-cpp docker-build-vibevoice-cpp docker-build-localvqe docker-build-insightface docker-build-speaker-recognition docker-build-sherpa-onnx docker-build-cloud-proxy
|
|
|
|
########################################################
|
|
### Mock Backend for E2E Tests
|
|
########################################################
|
|
|
|
build-mock-backend: protogen-go
|
|
$(GOCMD) build -o tests/e2e/mock-backend/mock-backend ./tests/e2e/mock-backend
|
|
|
|
clean-mock-backend:
|
|
rm -f tests/e2e/mock-backend/mock-backend
|
|
|
|
build-cloud-proxy-backend: protogen-go
|
|
$(GOCMD) build -o tests/e2e/mock-backend/cloud-proxy ./backend/go/cloud-proxy
|
|
|
|
clean-cloud-proxy-backend:
|
|
rm -f tests/e2e/mock-backend/cloud-proxy
|
|
|
|
########################################################
|
|
### UI E2E Test Server
|
|
########################################################
|
|
|
|
build-ui-test-server: build-mock-backend react-ui protogen-go
|
|
$(GOCMD) build -o tests/e2e-ui/ui-test-server ./tests/e2e-ui
|
|
|
|
test-ui-e2e: build-ui-test-server
|
|
cd core/http/react-ui && npm install && npx playwright install --with-deps chromium && npx playwright test
|
|
|
|
## Optional Playwright worker count for the UI e2e targets below. Pass
|
|
## UI_TEST_WORKERS=N (e.g. `make test-ui-coverage UI_TEST_WORKERS=20`) to
|
|
## override Playwright's default (cores/2). Empty by default so Playwright
|
|
## picks its own worker count.
|
|
UI_TEST_WORKERS ?=
|
|
PLAYWRIGHT_WORKERS_FLAG = $(if $(UI_TEST_WORKERS),--workers=$(UI_TEST_WORKERS),)
|
|
|
|
## Fast Playwright e2e run used by the pre-commit hook on React UI changes.
|
|
## Force-rebuilds the (non-instrumented) dist so the suite tests the working
|
|
## tree — not a stale dist the `react-ui` skip-guard would leave — re-embeds
|
|
## it into ui-test-server, and runs the specs. Uses the nix-provided browser
|
|
## when PLAYWRIGHT_CHROMIUM_PATH is set (flake dev shell), else falls back to
|
|
## downloading it as `test-ui-e2e` does.
|
|
test-ui: build-mock-backend protogen-go
|
|
cd core/http/react-ui && bun install && bun run build
|
|
$(GOCMD) build -o tests/e2e-ui/ui-test-server ./tests/e2e-ui
|
|
cd core/http/react-ui && sh $(CURDIR)/scripts/ensure-playwright-browser.sh && bunx playwright test $(PLAYWRIGHT_WORKERS_FLAG)
|
|
|
|
## React UI code coverage from the Playwright e2e suite. Builds a
|
|
## NON-instrumented bundle with source maps (COVERAGE_V8=true), re-embeds it
|
|
## into the ui-test-server (the dist is //go:embed'ed at compile time), runs the
|
|
## Playwright specs which collect native Chromium V8 coverage (PW_V8_COVERAGE=1)
|
|
## — far cheaper than istanbul's build-time counters (~40% faster end-to-end) —
|
|
## convert it to istanbul via v8-to-istanbul in the coverage fixture, and write
|
|
## an nyc report to core/http/react-ui/coverage/. Removes the dist afterwards so
|
|
## normal builds aren't served source-mapped assets. (The legacy istanbul path
|
|
## still exists: `bun run build:coverage` + unset PW_V8_COVERAGE.)
|
|
test-ui-coverage: build-mock-backend protogen-go
|
|
trap 'rm -rf "$(CURDIR)/core/http/react-ui/dist"' EXIT; \
|
|
( cd core/http/react-ui && bun install && bun run build:coverage-v8 ) && \
|
|
$(GOCMD) build -o tests/e2e-ui/ui-test-server ./tests/e2e-ui && \
|
|
( cd core/http/react-ui && rm -rf .nyc_output coverage && \
|
|
sh $(CURDIR)/scripts/ensure-playwright-browser.sh && \
|
|
PW_V8_COVERAGE=1 bunx playwright test $(PLAYWRIGHT_WORKERS_FLAG) && bun run coverage:report )
|
|
|
|
## UI coverage baseline (committed) and the strict gate that compares against
|
|
## it — the React mirror of test-coverage-baseline / test-coverage-check.
|
|
test-ui-coverage-baseline: test-ui-coverage
|
|
@node -e 'const fs=require("fs");process.stdout.write(String(JSON.parse(fs.readFileSync("core/http/react-ui/coverage/coverage-summary.json")).total.lines.pct))' > core/http/react-ui/coverage-baseline.txt
|
|
@echo "Saved UI coverage baseline: $$(cat core/http/react-ui/coverage-baseline.txt)% lines"
|
|
|
|
test-ui-coverage-check: test-ui-coverage
|
|
sh $(CURDIR)/scripts/ui-coverage-check.sh core/http/react-ui/coverage/coverage-summary.json core/http/react-ui/coverage-baseline.txt
|
|
|
|
test-ui-e2e-docker:
|
|
docker build -t localai-ui-e2e -f tests/e2e-ui/Dockerfile .
|
|
docker run --rm localai-ui-e2e
|
|
|
|
clean-ui-test-server:
|
|
rm -f tests/e2e-ui/ui-test-server
|
|
|
|
########################################################
|
|
### END Backends
|
|
########################################################
|
|
|
|
.PHONY: swagger
|
|
swagger:
|
|
swag init -g core/http/app.go --output swagger
|
|
|
|
# DEPRECATED: gen-assets is for the legacy Alpine.js UI. Remove when legacy UI is removed.
|
|
.PHONY: gen-assets
|
|
gen-assets:
|
|
$(GOCMD) run core/dependencies_manager/manager.go webui_static.yaml core/http/static/assets
|
|
|
|
## Documentation
|
|
docs/layouts/_default:
|
|
mkdir -p docs/layouts/_default
|
|
|
|
docs/static/gallery.html: docs/layouts/_default
|
|
$(GOCMD) run ./.github/ci/modelslist.go ./gallery/index.yaml > docs/static/gallery.html
|
|
|
|
docs/public: docs/layouts/_default docs/static/gallery.html
|
|
cd docs && hugo --minify
|
|
|
|
docs-clean:
|
|
rm -rf docs/public
|
|
rm -rf docs/static/gallery.html
|
|
|
|
.PHONY: docs
|
|
docs: docs/static/gallery.html
|
|
cd docs && hugo serve
|
|
|
|
########################################################
|
|
## Platform-specific builds
|
|
########################################################
|
|
|
|
## fyne cross-platform build
|
|
build-launcher-darwin: build-launcher
|
|
go run github.com/tiagomelo/macos-dmg-creator/cmd/createdmg@latest \
|
|
--appName "LocalAI" \
|
|
--appBinaryPath "$(LAUNCHER_BINARY_NAME)" \
|
|
--bundleIdentifier "com.localai.launcher" \
|
|
--iconPath "core/http/static/logo.png" \
|
|
--outputDir "dist/"
|
|
|
|
build-launcher-linux:
|
|
cd cmd/launcher && go run fyne.io/tools/cmd/fyne@latest package -os linux -icon ../../core/http/static/logo.png --executable $(LAUNCHER_BINARY_NAME)-linux && mv launcher.tar.xz ../../$(LAUNCHER_BINARY_NAME)-linux.tar.xz
|