From a0317d9926e306b70f2bc22a4c4f4bd00a0d6bc3 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 27 Apr 2026 23:09:20 +0000 Subject: [PATCH] refactor(tests): split app_test.go, move real-backend coverage to e2e-backends core/http/app_test.go had grown to 1495 lines exercising three concerns at once: HTTP-layer integration, real-backend inference (llama-gguf, tts, stablediffusion, transformers embeddings, whisper), and service logic that already has unit-level coverage. Each PR paid for 6 backend builds plus real-model downloads to satisfy a single suite. Reorg per layer: - app_test.go (1495 -> 1003 lines) drives the mock-backend binary only. Kept: auth, routing, gallery API, file:// import, /system, agent-jobs HTTP plumbing, config-file model loading. Deleted real-inference specs (llama-gguf chat, ggml completions/streaming, logprobs, logit_bias, transcription, embeddings, External-gRPC, Stores duplicate, Model gallery Context). Lifted Agent Jobs out of the deleted Stores Context. - tests/e2e-backends/backend_test.go gains logprobs, logit_bias, and no-first-token-dup specs (the latter folded into PredictStream). Two new caps gate them so non-LLM backends opt out. - tests/e2e-aio/e2e_test.go gains a streaming smoke under Context("text") to catch container-level streaming regressions. - tests/models_fixtures/ removed; all fixtures referenced testmodel.ggml. app_test.go now writes per-Context inline mock-model YAMLs. CI: - test.yml + tests-e2e.yml gain paths-ignore (docs/, examples/, *.md, backend/) so docs and backend-only PRs skip them. test.yml drops the 6-backend Build step plus TRANSFORMER_BACKEND/GO_TAGS=tts; tests-apple drops the llama-cpp-darwin build. - New tests-aio.yml runs the AIO container nightly + on workflow_dispatch + master/tags. The tests-e2e-container job moved out of test.yml so PRs no longer pay AIO cost. - New tests-llama-cpp-smoke job in test-extra.yml runs on every PR with no detect-changes gate; pulls quay.io/go-skynet/local-ai-backends: master-cpu-llama-cpp (no build on PR) and exercises predict/stream/ logprobs/logit_bias against Qwen3-0.6B. This is the PR-acceptance real-backend gate after AIO moved to nightly. The path-gated heavy test-extra-backend-llama-cpp wrapper appends the same caps so it exercises the moved specs when the backend actually changes. Makefile: - Deleted test-models/testmodel.ggml (the wget chain), test-llama-gguf, test-tts, test-stablediffusion, test-realtime-models. test target drops --label-filter, HUGGINGFACE_GRPC, TRANSFORMER_BACKEND, TEST_DIR, FIXTURES, CONFIG_FILE, MODELS_PATH, BACKENDS_PATH; depends on build-mock-backend. test-stores keeps a focused entry point and depends on backends/local-store. clean-tests also clears the mock-backend binary. Net per typical Go-side PR: ~25min (6 backend builds + tests + AIO) + ~8min e2e drops to ~5min mock-backend test + ~8min e2e + ~5-10min llama-cpp-smoke (image pulled). Docs and backend-only PRs skip the always-on workflows entirely. Signed-off-by: Ettore Di Giacinto Assisted-by: claude-code:claude-opus-4-7 [Edit] [Write] [Bash] --- .github/workflows/test-extra.yml | 27 + .github/workflows/test.yml | 76 +- .github/workflows/tests-aio.yml | 86 +++ .github/workflows/tests-e2e.yml | 6 + Makefile | 67 +- core/http/app_test.go | 831 +++++----------------- core/http/http_suite_test.go | 42 ++ tests/e2e-aio/e2e_test.go | 28 + tests/e2e-backends/backend_test.go | 62 ++ tests/models_fixtures/completion.tmpl | 1 - tests/models_fixtures/config.yaml | 32 - tests/models_fixtures/embeddings.yaml | 4 - tests/models_fixtures/ggml-gpt4all-j.tmpl | 4 - tests/models_fixtures/gpt4.yaml | 16 - tests/models_fixtures/gpt4_2.yaml | 16 - tests/models_fixtures/grpc.yaml | 5 - tests/models_fixtures/rwkv.yaml | 24 - tests/models_fixtures/whisper.yaml | 4 - 18 files changed, 451 insertions(+), 880 deletions(-) create mode 100644 .github/workflows/tests-aio.yml delete mode 100644 tests/models_fixtures/completion.tmpl delete mode 100644 tests/models_fixtures/config.yaml delete mode 100644 tests/models_fixtures/embeddings.yaml delete mode 100644 tests/models_fixtures/ggml-gpt4all-j.tmpl delete mode 100644 tests/models_fixtures/gpt4.yaml delete mode 100644 tests/models_fixtures/gpt4_2.yaml delete mode 100644 tests/models_fixtures/grpc.yaml delete mode 100644 tests/models_fixtures/rwkv.yaml delete mode 100644 tests/models_fixtures/whisper.yaml diff --git a/.github/workflows/test-extra.yml b/.github/workflows/test-extra.yml index 67ab16938..d37f77e01 100644 --- a/.github/workflows/test-extra.yml +++ b/.github/workflows/test-extra.yml @@ -507,6 +507,33 @@ jobs: - name: Build llama-cpp backend image and run audio transcription gRPC e2e tests run: | make test-extra-backend-llama-cpp-transcription + # PR-acceptance smoke gate: always runs on every PR (no detect-changes gate, no + # paths filter). Pulls the pre-built master CPU llama-cpp image from quay + # instead of building from source, so the cost is a docker pull (~30s) plus the + # short Qwen3-0.6B model download. Exercises the full gRPC surface — health, + # load, predict, stream — plus the logprobs/logit_bias specs that moved out of + # core/http/app_test.go. Anything heavier or per-backend is gated to the + # detect-changes path-filter above. + tests-llama-cpp-smoke: + runs-on: ubuntu-latest + timeout-minutes: 20 + steps: + - name: Clone + uses: actions/checkout@v6 + with: + submodules: true + - name: Setup Go + uses: actions/setup-go@v5 + with: + go-version: '1.25.4' + - name: Pull pre-built llama-cpp backend image + run: docker pull quay.io/go-skynet/local-ai-backends:master-cpu-llama-cpp + - name: Run e2e-backends smoke + env: + BACKEND_IMAGE: quay.io/go-skynet/local-ai-backends:master-cpu-llama-cpp + BACKEND_TEST_CAPS: health,load,predict,stream,logprobs,logit_bias + run: | + make test-extra-backend # Realtime e2e with sherpa-onnx driving VAD + STT + TTS against a mocked LLM. # Builds the sherpa-onnx Docker image, extracts the rootfs so the e2e suite # can discover the backend binary + shared libs, downloads the three model diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 5b104d1a9..f1c183acd 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -3,6 +3,12 @@ name: 'tests' on: pull_request: + paths-ignore: + - 'docs/**' + - 'examples/**' + - 'README.md' + - '**/*.md' + - 'backend/**' push: branches: - master @@ -97,73 +103,9 @@ jobs: node-version: '22' - name: Build React UI run: make react-ui - - name: Build backends - run: | - make backends/transformers - mkdir external && mv backends/transformers external/transformers - make backends/llama-cpp backends/local-store backends/silero-vad backends/piper backends/whisper backends/stablediffusion-ggml - name: Test run: | - TRANSFORMER_BACKEND=$PWD/external/transformers/run.sh PATH="$PATH:/root/go/bin" GO_TAGS="tts" make --jobs 5 --output-sync=target test - - name: Setup tmate session if tests fail - if: ${{ failure() }} - uses: mxschmitt/action-tmate@v3.23 - with: - detached: true - connect-timeout-seconds: 180 - limit-access-to-actor: true - - tests-e2e-container: - runs-on: ubuntu-latest - steps: - - name: Release space from worker - run: | - echo "Listing top largest packages" - pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr) - head -n 30 <<< "${pkgs}" - echo - df -h - echo - sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true - sudo apt-get remove --auto-remove android-sdk-platform-tools || true - sudo apt-get purge --auto-remove android-sdk-platform-tools || true - sudo rm -rf /usr/local/lib/android - sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true - sudo rm -rf /usr/share/dotnet - sudo apt-get remove -y '^mono-.*' || true - sudo apt-get remove -y '^ghc-.*' || true - sudo apt-get remove -y '.*jdk.*|.*jre.*' || true - sudo apt-get remove -y 'php.*' || true - sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true - sudo apt-get remove -y '^google-.*' || true - sudo apt-get remove -y azure-cli || true - sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true - sudo apt-get remove -y '^gfortran-.*' || true - sudo apt-get autoremove -y - sudo apt-get clean - echo - echo "Listing top largest packages" - pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr) - head -n 30 <<< "${pkgs}" - echo - sudo rm -rfv build || true - df -h - - name: Clone - uses: actions/checkout@v6 - with: - submodules: true - - name: Dependencies - run: | - # Install protoc - curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \ - unzip -j -d /usr/local/bin protoc.zip bin/protoc && \ - rm protoc.zip - go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 - go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af - PATH="$PATH:$HOME/go/bin" make protogen-go - - name: Test - run: | - PATH="$PATH:$HOME/go/bin" make backends/local-store backends/silero-vad backends/llama-cpp backends/whisper backends/piper backends/stablediffusion-ggml docker-build-e2e e2e-aio + PATH="$PATH:/root/go/bin" make --jobs 5 --output-sync=target test - name: Setup tmate session if tests fail if: ${{ failure() }} uses: mxschmitt/action-tmate@v3.23 @@ -200,10 +142,6 @@ jobs: node-version: '22' - name: Build React UI run: make react-ui - - name: Build llama-cpp-darwin - run: | - make protogen-go - make backends/llama-cpp-darwin - name: Test run: | export C_INCLUDE_PATH=/usr/local/include diff --git a/.github/workflows/tests-aio.yml b/.github/workflows/tests-aio.yml new file mode 100644 index 000000000..7c8579266 --- /dev/null +++ b/.github/workflows/tests-aio.yml @@ -0,0 +1,86 @@ +--- +name: 'tests-aio' + +# Runs the all-in-one (AIO) Docker image with real backends + real models. +# Heavy: builds llama-cpp/whisper/piper/silero-vad/stablediffusion-ggml/local-store +# and exercises end-to-end inference inside the container. Moved out of test.yml +# (which used to run on every PR) so PR CI no longer pays this cost. +# +# Triggers: +# - schedule (nightly @ 04:00 UTC) — catches packaging/image regressions within 24h +# - workflow_dispatch — manual run on-demand +# - push to master/tags — sanity check after merge / before release + +on: + schedule: + - cron: '0 4 * * *' + workflow_dispatch: + push: + branches: + - master + tags: + - '*' + +concurrency: + group: ci-tests-aio-${{ github.head_ref || github.ref }}-${{ github.repository }} + cancel-in-progress: true + +jobs: + tests-aio: + runs-on: ubuntu-latest + steps: + - name: Release space from worker + run: | + echo "Listing top largest packages" + pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr) + head -n 30 <<< "${pkgs}" + echo + df -h + echo + sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true + sudo apt-get remove --auto-remove android-sdk-platform-tools || true + sudo apt-get purge --auto-remove android-sdk-platform-tools || true + sudo rm -rf /usr/local/lib/android + sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true + sudo rm -rf /usr/share/dotnet + sudo apt-get remove -y '^mono-.*' || true + sudo apt-get remove -y '^ghc-.*' || true + sudo apt-get remove -y '.*jdk.*|.*jre.*' || true + sudo apt-get remove -y 'php.*' || true + sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true + sudo apt-get remove -y '^google-.*' || true + sudo apt-get remove -y azure-cli || true + sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true + sudo apt-get remove -y '^gfortran-.*' || true + sudo apt-get autoremove -y + sudo apt-get clean + echo + echo "Listing top largest packages" + pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr) + head -n 30 <<< "${pkgs}" + echo + sudo rm -rfv build || true + df -h + - name: Clone + uses: actions/checkout@v6 + with: + submodules: true + - name: Dependencies + run: | + # Install protoc + curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \ + unzip -j -d /usr/local/bin protoc.zip bin/protoc && \ + rm protoc.zip + go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 + go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af + PATH="$PATH:$HOME/go/bin" make protogen-go + - name: Test + run: | + PATH="$PATH:$HOME/go/bin" make backends/local-store backends/silero-vad backends/llama-cpp backends/whisper backends/piper backends/stablediffusion-ggml docker-build-e2e e2e-aio + - name: Setup tmate session if tests fail + if: ${{ failure() }} + uses: mxschmitt/action-tmate@v3.23 + with: + detached: true + connect-timeout-seconds: 180 + limit-access-to-actor: true diff --git a/.github/workflows/tests-e2e.yml b/.github/workflows/tests-e2e.yml index 73a9535b6..de916cc4a 100644 --- a/.github/workflows/tests-e2e.yml +++ b/.github/workflows/tests-e2e.yml @@ -3,6 +3,12 @@ name: 'E2E Backend Tests' on: pull_request: + paths-ignore: + - 'docs/**' + - 'examples/**' + - 'README.md' + - '**/*.md' + - 'backend/**' push: branches: - master diff --git a/Makefile b/Makefile index 2f3ee1dea..a76f7975a 100644 --- a/Makefile +++ b/Makefile @@ -85,6 +85,7 @@ clean: ## Remove build related file clean-tests: rm -rf test-models rm -rf test-dir + rm -f tests/e2e/mock-backend/mock-backend ## Install Go tools install-go-tools: @@ -143,32 +144,24 @@ osx-signed: build run: ## run local-ai CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) run ./ -test-models/testmodel.ggml: - mkdir -p test-models - mkdir -p test-dir - wget -q https://huggingface.co/mradermacher/gpt2-alpaca-gpt4-GGUF/resolve/main/gpt2-alpaca-gpt4.Q4_K_M.gguf -O test-models/testmodel.ggml - wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en - wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav - cp tests/models_fixtures/* test-models - -prepare-test: protogen-go - cp tests/models_fixtures/* test-models +prepare-test: protogen-go build-mock-backend ######################################################## ## Tests ######################################################## ## Test targets -test: test-models/testmodel.ggml protogen-go +## After the test-suite reorg (see plans/test-reorg) the default `make test` +## no longer downloads multi-GB GGUF/whisper fixtures or builds llama-cpp / +## transformers / piper / whisper / stablediffusion-ggml. core/http/app_test.go +## now drives the mock-backend binary built by build-mock-backend; real-backend +## inference moved into tests/e2e-backends/ (per-backend, path-filtered) and +## tests/e2e-aio/ (nightly). +test: prepare-test @echo 'Running tests' export GO_TAGS="debug" - $(MAKE) prepare-test OPUS_SHIM_LIBRARY=$(abspath ./pkg/opus/shim/libopusshim.so) \ - HUGGINGFACE_GRPC=$(abspath ./)/backend/python/transformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models BACKENDS_PATH=$(abspath ./)/backends \ - $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS) - $(MAKE) test-llama-gguf - $(MAKE) test-tts - $(MAKE) test-stablediffusion + $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS) ######################################################## ## E2E AIO tests (uses standard image with pre-configured models) @@ -235,20 +228,12 @@ teardown-e2e: ## Integration and unit tests ######################################################## -test-llama-gguf: prepare-test - TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models BACKENDS_PATH=$(abspath ./)/backends \ - $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS) - -test-tts: prepare-test - TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models BACKENDS_PATH=$(abspath ./)/backends \ - $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="tts" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS) - -test-stablediffusion: prepare-test - TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models BACKENDS_PATH=$(abspath ./)/backends \ - $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stablediffusion" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS) - -test-stores: - $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stores" --flake-attempts $(TEST_FLAKES) -v -r tests/integration +## Storage / vector-store integration. Requires the local-store backend to +## be available — we build it on demand and pass its location via +## BACKENDS_PATH (the model loader looks there for the gRPC binary). +test-stores: backends/local-store + BACKENDS_PATH=$(abspath ./)/backends \ + $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) -v -r tests/integration test-opus: @echo 'Running opus backend tests' @@ -269,23 +254,13 @@ test-realtime: build-mock-backend @echo 'Running realtime e2e tests (mock backend)' $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="Realtime && !real-models" --flake-attempts $(TEST_FLAKES) -v -r ./tests/e2e -# Real-model realtime tests. Set REALTIME_TEST_MODEL to use your own pipeline, -# or leave unset to auto-build one from the component env vars below. +# Container-based real-model realtime testing. Build env vars / pipeline +# definition kept here so test-realtime-models-docker can drive a fully wired +# pipeline (VAD + STT + LLM + TTS) from inside a containerised runner. REALTIME_VAD?=silero-vad-ggml REALTIME_STT?=whisper-1 REALTIME_LLM?=qwen3-0.6b REALTIME_TTS?=tts-1 -REALTIME_BACKENDS_PATH?=$(abspath ./)/backends - -test-realtime-models: build-mock-backend - @echo 'Running realtime e2e tests (real models)' - REALTIME_TEST_MODEL=$${REALTIME_TEST_MODEL:-realtime-test-pipeline} \ - REALTIME_VAD=$(REALTIME_VAD) \ - REALTIME_STT=$(REALTIME_STT) \ - REALTIME_LLM=$(REALTIME_LLM) \ - REALTIME_TTS=$(REALTIME_TTS) \ - REALTIME_BACKENDS_PATH=$(REALTIME_BACKENDS_PATH) \ - $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="Realtime" --flake-attempts $(TEST_FLAKES) -v -r ./tests/e2e # --- Container-based real-model testing --- @@ -528,7 +503,9 @@ test-extra-backend: protogen-go ## Convenience wrappers: build the image, then exercise it. test-extra-backend-llama-cpp: docker-build-llama-cpp - BACKEND_IMAGE=local-ai-backend:llama-cpp $(MAKE) test-extra-backend + BACKEND_IMAGE=local-ai-backend:llama-cpp \ + BACKEND_TEST_CAPS=health,load,predict,stream,logprobs,logit_bias \ + $(MAKE) test-extra-backend test-extra-backend-ik-llama-cpp: docker-build-ik-llama-cpp BACKEND_IMAGE=local-ai-backend:ik-llama-cpp $(MAKE) test-extra-backend diff --git a/core/http/app_test.go b/core/http/app_test.go index 74220855c..31c6c5a55 100644 --- a/core/http/app_test.go +++ b/core/http/app_test.go @@ -9,7 +9,6 @@ import ( "net/http" "os" "path/filepath" - "runtime" "time" "github.com/mudler/LocalAI/core/application" @@ -28,7 +27,6 @@ import ( "github.com/mudler/xlog" openaigo "github.com/otiai10/openaigo" "github.com/sashabaranov/go-openai" - "github.com/sashabaranov/go-openai/jsonschema" ) const apiKey = "joshua" @@ -322,7 +320,9 @@ var _ = Describe("API test", func() { tmpdir, err = os.MkdirTemp("", "") Expect(err).ToNot(HaveOccurred()) - backendPath := os.Getenv("BACKENDS_PATH") + // No real backends needed — these specs cover gallery API, auth, + // routing, and file:// import. Use the suite-level empty backend dir. + backendPath := backendDir modelDir = filepath.Join(tmpdir, "models") err = os.Mkdir(modelDir, 0750) @@ -671,258 +671,42 @@ parameters: }) }) - Context("Model gallery", func() { + Context("API query", func() { BeforeEach(func() { - var err error - tmpdir, err = os.MkdirTemp("", "") - - backendPath := os.Getenv("BACKENDS_PATH") - - Expect(err).ToNot(HaveOccurred()) - modelDir = filepath.Join(tmpdir, "models") - backendAssetsDir := filepath.Join(tmpdir, "backend-assets") - err = os.Mkdir(backendAssetsDir, 0750) - Expect(err).ToNot(HaveOccurred()) - + if mockBackendPath == "" { + Skip("mock-backend binary not built; run 'make build-mock-backend'") + } c, cancel = context.WithCancel(context.Background()) - galleries := []config.Gallery{ - { - Name: "localai", - URL: "https://raw.githubusercontent.com/mudler/LocalAI/refs/heads/master/gallery/index.yaml", - }, - } + // Stand up an isolated model dir for this Context so the suite can + // register a mock-model config (read by /v1/models, /system, and the + // agent-jobs flow) without depending on real backend builds. + var err error + tmpdir, err = os.MkdirTemp("", "") + Expect(err).ToNot(HaveOccurred()) + modelDir = filepath.Join(tmpdir, "models") + Expect(os.Mkdir(modelDir, 0750)).To(Succeed()) + + mockModelYAML := `name: mock-model +backend: mock-backend +parameters: + model: mock-model.bin +` + Expect(os.WriteFile(filepath.Join(modelDir, "mock-model.yaml"), []byte(mockModelYAML), 0644)).To(Succeed()) systemState, err := system.GetSystemState( - system.WithBackendPath(backendPath), + system.WithBackendPath(backendDir), system.WithModelPath(modelDir), ) Expect(err).ToNot(HaveOccurred()) application, err := application.New( append(commonOpts, - config.WithContext(c), - config.WithGeneratedContentDir(tmpdir), - config.WithSystemState(systemState), - config.WithGalleries(galleries), - )..., - ) - Expect(err).ToNot(HaveOccurred()) - app, err = API(application) - Expect(err).ToNot(HaveOccurred()) - - go func() { - if err := app.Start("127.0.0.1:9090"); err != nil && err != http.ErrServerClosed { - xlog.Error("server error", "error", err) - } - }() - - defaultConfig := openai.DefaultConfig("") - defaultConfig.BaseURL = "http://127.0.0.1:9090/v1" - - client2 = openaigo.NewClient("") - client2.BaseURL = defaultConfig.BaseURL - - // Wait for API to be ready - client = openai.NewClientWithConfig(defaultConfig) - Eventually(func() error { - _, err := client.ListModels(context.TODO()) - return err - }, "2m").ShouldNot(HaveOccurred()) - }) - - AfterEach(func() { - cancel() - if app != nil { - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() - err := app.Shutdown(ctx) - Expect(err).ToNot(HaveOccurred()) - } - err := os.RemoveAll(tmpdir) - Expect(err).ToNot(HaveOccurred()) - _, err = os.ReadDir(tmpdir) - Expect(err).To(HaveOccurred()) - }) - - It("runs gguf models (chat)", Label("llama-gguf"), func() { - if runtime.GOOS != "linux" { - Skip("test supported only on linux") - } - - modelName := "qwen3-1.7b" - response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{ - ID: "localai@" + modelName, - }) - - Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response)) - - uuid := response["uuid"].(string) - - Eventually(func() bool { - response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid) - return response["processed"].(bool) - }, "900s", "10s").Should(Equal(true)) - - By("testing chat") - resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: modelName, Messages: []openai.ChatCompletionMessage{ - { - Role: "user", - Content: "How much is 2+2?", - }, - }}) - Expect(err).ToNot(HaveOccurred()) - Expect(len(resp.Choices)).To(Equal(1)) - Expect(resp.Choices[0].Message.Content).To(Or(ContainSubstring("4"), ContainSubstring("four"))) - - By("testing functions") - resp2, err := client.CreateChatCompletion( - context.TODO(), - openai.ChatCompletionRequest{ - Model: modelName, - Messages: []openai.ChatCompletionMessage{ - { - Role: "user", - Content: "What is the weather like in San Francisco (celsius)?", - }, - }, - Functions: []openai.FunctionDefinition{ - openai.FunctionDefinition{ - Name: "get_current_weather", - Description: "Get the current weather", - Parameters: jsonschema.Definition{ - Type: jsonschema.Object, - Properties: map[string]jsonschema.Definition{ - "location": { - Type: jsonschema.String, - Description: "The city and state, e.g. San Francisco, CA", - }, - "unit": { - Type: jsonschema.String, - Enum: []string{"celcius", "fahrenheit"}, - }, - }, - Required: []string{"location"}, - }, - }, - }, - }) - Expect(err).ToNot(HaveOccurred()) - Expect(len(resp2.Choices)).To(Equal(1)) - Expect(resp2.Choices[0].Message.FunctionCall).ToNot(BeNil()) - Expect(resp2.Choices[0].Message.FunctionCall.Name).To(Equal("get_current_weather"), resp2.Choices[0].Message.FunctionCall.Name) - - var res map[string]string - err = json.Unmarshal([]byte(resp2.Choices[0].Message.FunctionCall.Arguments), &res) - Expect(err).ToNot(HaveOccurred()) - Expect(res["location"]).To(ContainSubstring("San Francisco"), fmt.Sprint(res)) - Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res)) - Expect(string(resp2.Choices[0].FinishReason)).To(Equal("function_call"), fmt.Sprint(resp2.Choices[0].FinishReason)) - }) - - It("installs and is capable to run tts", Label("tts"), func() { - if runtime.GOOS != "linux" { - Skip("test supported only on linux") - } - - response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{ - ID: "localai@voice-en-us-kathleen-low", - }) - - Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response)) - - uuid := response["uuid"].(string) - - Eventually(func() bool { - response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid) - fmt.Println(response) - return response["processed"].(bool) - }, "360s", "10s").Should(Equal(true)) - - // An HTTP Post to the /tts endpoint should return a wav audio file - resp, err := http.Post("http://127.0.0.1:9090/tts", "application/json", bytes.NewBuffer([]byte(`{"input": "Hello world", "model": "voice-en-us-kathleen-low"}`))) - Expect(err).ToNot(HaveOccurred(), fmt.Sprint(resp)) - dat, err := io.ReadAll(resp.Body) - Expect(err).ToNot(HaveOccurred(), fmt.Sprint(resp)) - - Expect(resp.StatusCode).To(Equal(200), fmt.Sprint(string(dat))) - Expect(resp.Header.Get("Content-Type")).To(Or(Equal("audio/x-wav"), Equal("audio/wav"), Equal("audio/vnd.wave"))) - }) - It("installs and is capable to generate images", Label("stablediffusion"), func() { - if runtime.GOOS != "linux" { - Skip("test supported only on linux") - } - - response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{ - ID: "localai@sd-1.5-ggml", - Name: "stablediffusion", - }) - - Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response)) - - uuid := response["uuid"].(string) - - Eventually(func() bool { - response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid) - fmt.Println(response) - return response["processed"].(bool) - }, "1200s", "10s").Should(Equal(true)) - - resp, err := http.Post( - "http://127.0.0.1:9090/v1/images/generations", - "application/json", - bytes.NewBuffer([]byte(`{ - "prompt": "a lovely cat", - "step": 1, "seed":9000, - "size": "256x256", "n":2}`))) - // The response should contain an URL - Expect(err).ToNot(HaveOccurred(), fmt.Sprint(resp)) - dat, err := io.ReadAll(resp.Body) - Expect(err).ToNot(HaveOccurred(), "error reading /image/generations response") - - imgUrlResp := &schema.OpenAIResponse{} - err = json.Unmarshal(dat, imgUrlResp) - Expect(err).ToNot(HaveOccurred(), fmt.Sprint(dat)) - Expect(imgUrlResp.Data).ToNot(Or(BeNil(), BeZero())) - imgUrl := imgUrlResp.Data[0].URL - Expect(imgUrl).To(ContainSubstring("http://127.0.0.1:9090/"), imgUrl) - Expect(imgUrl).To(ContainSubstring(".png"), imgUrl) - - imgResp, err := http.Get(imgUrl) - Expect(err).To(BeNil()) - Expect(imgResp).ToNot(BeNil()) - Expect(imgResp.StatusCode).To(Equal(200)) - Expect(imgResp.ContentLength).To(BeNumerically(">", 0)) - imgData := make([]byte, 512) - count, err := io.ReadFull(imgResp.Body, imgData) - Expect(err).To(Or(BeNil(), MatchError(io.EOF))) - Expect(count).To(BeNumerically(">", 0)) - Expect(count).To(BeNumerically("<=", 512)) - Expect(http.DetectContentType(imgData)).To(Equal("image/png")) - }) - }) - - Context("API query", func() { - BeforeEach(func() { - modelPath := os.Getenv("MODELS_PATH") - backendPath := os.Getenv("BACKENDS_PATH") - c, cancel = context.WithCancel(context.Background()) - - var err error - - systemState, err := system.GetSystemState( - system.WithBackendPath(backendPath), - system.WithModelPath(modelPath), - ) - Expect(err).ToNot(HaveOccurred()) - - application, err := application.New( - append(commonOpts, - config.WithExternalBackend("transformers", os.Getenv("TRANSFORMER_BACKEND")), config.WithContext(c), config.WithSystemState(systemState), )...) Expect(err).ToNot(HaveOccurred()) + application.ModelLoader().SetExternalBackend("mock-backend", mockBackendPath) app, err = API(application) Expect(err).ToNot(HaveOccurred()) go func() { @@ -952,149 +736,12 @@ parameters: err := app.Shutdown(ctx) Expect(err).ToNot(HaveOccurred()) } + Expect(os.RemoveAll(tmpdir)).To(Succeed()) }) It("returns the models list", func() { models, err := client.ListModels(context.TODO()) Expect(err).ToNot(HaveOccurred()) - Expect(len(models.Models)).To(BeNumerically(">=", 7)) - }) - It("can generate completions via ggml", func() { - if runtime.GOOS != "linux" { - Skip("test supported only on linux") - } - resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel.ggml", Prompt: testPrompt}) - Expect(err).ToNot(HaveOccurred()) - Expect(len(resp.Choices)).To(Equal(1)) - Expect(resp.Choices[0].Text).ToNot(BeEmpty()) - }) - - It("can generate chat completions via ggml", func() { - if runtime.GOOS != "linux" { - Skip("test supported only on linux") - } - resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "testmodel.ggml", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: testPrompt}}}) - Expect(err).ToNot(HaveOccurred()) - Expect(len(resp.Choices)).To(Equal(1)) - Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty()) - }) - - It("does not duplicate the first content token in streaming chat completions", Label("llama-gguf", "llama-gguf-stream"), func() { - if runtime.GOOS != "linux" { - Skip("test supported only on linux") - } - stream, err := client.CreateChatCompletionStream(context.TODO(), openai.ChatCompletionRequest{ - Model: "testmodel.ggml", - Messages: []openai.ChatCompletionMessage{{Role: "user", Content: testPrompt}}, - }) - Expect(err).ToNot(HaveOccurred()) - defer stream.Close() - - var contentParts []string - for { - chunk, err := stream.Recv() - if err == io.EOF { - break - } - Expect(err).ToNot(HaveOccurred()) - if len(chunk.Choices) > 0 { - delta := chunk.Choices[0].Delta.Content - if delta != "" { - contentParts = append(contentParts, delta) - } - } - } - - Expect(contentParts).ToNot(BeEmpty(), "Expected streaming content tokens") - // The first content token should appear exactly once. - // A bug in grpc-server.cpp caused the role-init array element - // to get the same ChatDelta stamped, duplicating the first token. - if len(contentParts) >= 2 { - Expect(contentParts[0]).ToNot(Equal(contentParts[1]), - "First content token was duplicated: %v", contentParts[:2]) - } - }) - - It("returns logprobs in chat completions when requested", func() { - if runtime.GOOS != "linux" { - Skip("test only on linux") - } - topLogprobsVal := 3 - response, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{ - Model: "testmodel.ggml", - LogProbs: true, - TopLogProbs: topLogprobsVal, - Messages: []openai.ChatCompletionMessage{{Role: "user", Content: testPrompt}}}) - Expect(err).ToNot(HaveOccurred()) - - Expect(len(response.Choices)).To(Equal(1)) - Expect(response.Choices[0].Message).ToNot(BeNil()) - Expect(response.Choices[0].Message.Content).ToNot(BeEmpty()) - - // Verify logprobs are present and have correct structure - Expect(response.Choices[0].LogProbs).ToNot(BeNil()) - Expect(response.Choices[0].LogProbs.Content).ToNot(BeEmpty()) - - Expect(len(response.Choices[0].LogProbs.Content)).To(BeNumerically(">", 1)) - - foundatLeastToken := "" - foundAtLeastBytes := []byte{} - foundAtLeastTopLogprobBytes := []byte{} - foundatLeastTopLogprob := "" - // Verify logprobs content structure matches OpenAI format - for _, logprobContent := range response.Choices[0].LogProbs.Content { - // Bytes can be empty for certain tokens (special tokens, etc.), so we don't require it - if len(logprobContent.Bytes) > 0 { - foundAtLeastBytes = logprobContent.Bytes - } - if len(logprobContent.Token) > 0 { - foundatLeastToken = logprobContent.Token - } - Expect(logprobContent.LogProb).To(BeNumerically("<=", 0)) // Logprobs are always <= 0 - Expect(len(logprobContent.TopLogProbs)).To(BeNumerically(">", 1)) - - // If top_logprobs is requested, verify top_logprobs array respects the limit - if len(logprobContent.TopLogProbs) > 0 { - // Should respect top_logprobs limit (3 in this test) - Expect(len(logprobContent.TopLogProbs)).To(BeNumerically("<=", topLogprobsVal)) - for _, topLogprob := range logprobContent.TopLogProbs { - if len(topLogprob.Bytes) > 0 { - foundAtLeastTopLogprobBytes = topLogprob.Bytes - } - if len(topLogprob.Token) > 0 { - foundatLeastTopLogprob = topLogprob.Token - } - Expect(topLogprob.LogProb).To(BeNumerically("<=", 0)) - } - } - } - - Expect(foundAtLeastBytes).ToNot(BeEmpty()) - Expect(foundAtLeastTopLogprobBytes).ToNot(BeEmpty()) - Expect(foundatLeastToken).ToNot(BeEmpty()) - Expect(foundatLeastTopLogprob).ToNot(BeEmpty()) - }) - - It("applies logit_bias to chat completions when requested", func() { - if runtime.GOOS != "linux" { - Skip("test only on linux") - } - // logit_bias is a map of token IDs (as strings) to bias values (-100 to 100) - // According to OpenAI API: modifies the likelihood of specified tokens appearing in the completion - logitBias := map[string]int{ - "15043": 1, // Bias token ID 15043 (example token ID) with bias value 1 - } - response, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{ - Model: "testmodel.ggml", - Messages: []openai.ChatCompletionMessage{{Role: "user", Content: testPrompt}}, - LogitBias: logitBias, - }) - Expect(err).ToNot(HaveOccurred()) - Expect(len(response.Choices)).To(Equal(1)) - Expect(response.Choices[0].Message).ToNot(BeNil()) - Expect(response.Choices[0].Message.Content).ToNot(BeEmpty()) - // If logit_bias is applied, the response should be generated successfully - // We can't easily verify the bias effect without knowing the actual token IDs for the model, - // but the fact that the request succeeds confirms the API accepts and processes logit_bias + Expect(len(models.Models)).To(BeNumerically(">=", 1)) }) It("returns errors", func() { @@ -1103,331 +750,193 @@ parameters: Expect(err.Error()).To(ContainSubstring("error, status code: 404, status: 404 Not Found")) }) - It("shows the external backend", func() { - // Only run on linux - if runtime.GOOS != "linux" { - Skip("test supported only on linux") - } - // do an http request to the /system endpoint + It("shows the external backend on /system", func() { + // /system reports the backends available to the application. + // Mock-backend is registered via SetExternalBackend so it appears + // alongside any built-in entries; verifying that string proves the + // endpoint is wired up regardless of which real backends exist. resp, err := http.Get("http://127.0.0.1:9090/system") Expect(err).ToNot(HaveOccurred()) Expect(resp.StatusCode).To(Equal(200)) dat, err := io.ReadAll(resp.Body) Expect(err).ToNot(HaveOccurred()) - Expect(string(dat)).To(ContainSubstring("llama-cpp")) + Expect(string(dat)).To(ContainSubstring("mock-backend")) }) - It("transcribes audio", func() { - if runtime.GOOS != "linux" { - Skip("test supported only on linux") - } - resp, err := client.CreateTranscription( - context.Background(), - openai.AudioRequest{ - Model: openai.Whisper1, - FilePath: filepath.Join(os.Getenv("TEST_DIR"), "audio.wav"), - }, - ) - Expect(err).ToNot(HaveOccurred()) - Expect(resp.Text).To(ContainSubstring("This is the Micro Machine Man presenting")) - }) - - It("calculate embeddings", func() { - if runtime.GOOS != "linux" { - Skip("test supported only on linux") - } - embeddingModel := openai.AdaEmbeddingV2 - resp, err := client.CreateEmbeddings( - context.Background(), - openai.EmbeddingRequest{ - Model: embeddingModel, - Input: []string{"sun", "cat"}, - }, - ) - Expect(err).ToNot(HaveOccurred(), err) - Expect(len(resp.Data[0].Embedding)).To(BeNumerically("==", 4096)) - Expect(len(resp.Data[1].Embedding)).To(BeNumerically("==", 4096)) - - sunEmbedding := resp.Data[0].Embedding - resp2, err := client.CreateEmbeddings( - context.Background(), - openai.EmbeddingRequest{ - Model: embeddingModel, - Input: []string{"sun"}, - }, - ) - Expect(err).ToNot(HaveOccurred()) - Expect(resp2.Data[0].Embedding).To(Equal(sunEmbedding)) - Expect(resp2.Data[0].Embedding).ToNot(Equal(resp.Data[1].Embedding)) - - resp3, err := client.CreateEmbeddings( - context.Background(), - openai.EmbeddingRequest{ - Model: embeddingModel, - Input: []string{"cat"}, - }, - ) - Expect(err).ToNot(HaveOccurred()) - Expect(resp3.Data[0].Embedding).To(Equal(resp.Data[1].Embedding)) - Expect(resp3.Data[0].Embedding).ToNot(Equal(sunEmbedding)) - }) - - Context("External gRPC calls", func() { - It("calculate embeddings with sentencetransformers", func() { - if runtime.GOOS != "linux" { - Skip("test supported only on linux") + // Agent Jobs: HTTP API for task/job scheduling. The underlying AgentPool + // service is exercised in core/services/agentpool/agent_jobs_test.go; + // these specs cover the /api/agent/* HTTP plumbing on top. + Context("Agent Jobs", func() { + It("creates and manages tasks", func() { + // Create a task + taskBody := map[string]any{ + "name": "Test Task", + "description": "Test Description", + "model": "mock-model", + "prompt": "Hello {{.name}}", + "enabled": true, } - resp, err := client.CreateEmbeddings( - context.Background(), - openai.EmbeddingRequest{ - Model: openai.AdaCodeSearchCode, - Input: []string{"sun", "cat"}, - }, - ) + + var createResp map[string]any + err := postRequestResponseJSON("http://127.0.0.1:9090/api/agent/tasks", &taskBody, &createResp) Expect(err).ToNot(HaveOccurred()) - Expect(len(resp.Data[0].Embedding)).To(BeNumerically("==", 384)) - Expect(len(resp.Data[1].Embedding)).To(BeNumerically("==", 384)) + Expect(createResp["id"]).ToNot(BeEmpty()) + taskID := createResp["id"].(string) - sunEmbedding := resp.Data[0].Embedding - resp2, err := client.CreateEmbeddings( - context.Background(), - openai.EmbeddingRequest{ - Model: openai.AdaCodeSearchCode, - Input: []string{"sun"}, - }, - ) + // Get the task + var task schema.Task + resp, err := http.Get("http://127.0.0.1:9090/api/agent/tasks/" + taskID) Expect(err).ToNot(HaveOccurred()) - Expect(resp2.Data[0].Embedding).To(Equal(sunEmbedding)) - Expect(resp2.Data[0].Embedding).ToNot(Equal(resp.Data[1].Embedding)) - }) - }) + Expect(resp.StatusCode).To(Equal(200)) + body, _ := io.ReadAll(resp.Body) + json.Unmarshal(body, &task) + Expect(task.Name).To(Equal("Test Task")) - // See tests/integration/stores_test - Context("Stores", Label("stores"), func() { + // List tasks + resp, err = http.Get("http://127.0.0.1:9090/api/agent/tasks") + Expect(err).ToNot(HaveOccurred()) + Expect(resp.StatusCode).To(Equal(200)) + var tasks []schema.Task + body, _ = io.ReadAll(resp.Body) + json.Unmarshal(body, &tasks) + Expect(len(tasks)).To(BeNumerically(">=", 1)) - BeforeEach(func() { - // Only run on linux - if runtime.GOOS != "linux" { - Skip("test supported only on linux") - } + // Update task + taskBody["name"] = "Updated Task" + err = putRequestJSON("http://127.0.0.1:9090/api/agent/tasks/"+taskID, &taskBody) + Expect(err).ToNot(HaveOccurred()) + + // Verify update + resp, err = http.Get("http://127.0.0.1:9090/api/agent/tasks/" + taskID) + Expect(err).ToNot(HaveOccurred()) + body, _ = io.ReadAll(resp.Body) + json.Unmarshal(body, &task) + Expect(task.Name).To(Equal("Updated Task")) + + // Delete task + req, _ := http.NewRequest("DELETE", "http://127.0.0.1:9090/api/agent/tasks/"+taskID, nil) + req.Header.Set("Authorization", bearerKey) + resp, err = http.DefaultClient.Do(req) + Expect(err).ToNot(HaveOccurred()) + Expect(resp.StatusCode).To(Equal(200)) }) - It("sets, gets, finds and deletes entries", func() { - ks := [][]float32{ - {0.1, 0.2, 0.3}, - {0.4, 0.5, 0.6}, - {0.7, 0.8, 0.9}, - } - vs := []string{ - "test1", - "test2", - "test3", - } - setBody := schema.StoresSet{ - Keys: ks, - Values: vs, + It("executes and monitors jobs", func() { + // Create a task first + taskBody := map[string]any{ + "name": "Job Test Task", + "model": "mock-model", + "prompt": "Say hello", + "enabled": true, } - url := "http://127.0.0.1:9090/stores/" - err := postRequestJSON(url+"set", &setBody) + var createResp map[string]any + err := postRequestResponseJSON("http://127.0.0.1:9090/api/agent/tasks", &taskBody, &createResp) Expect(err).ToNot(HaveOccurred()) + taskID := createResp["id"].(string) - getBody := schema.StoresGet{ - Keys: ks, + // Execute a job + jobBody := map[string]any{ + "task_id": taskID, + "parameters": map[string]string{}, } - var getRespBody schema.StoresGetResponse - err = postRequestResponseJSON(url+"get", &getBody, &getRespBody) + + var jobResp schema.JobExecutionResponse + err = postRequestResponseJSON("http://127.0.0.1:9090/api/agent/jobs/execute", &jobBody, &jobResp) Expect(err).ToNot(HaveOccurred()) - Expect(len(getRespBody.Keys)).To(Equal(len(ks))) + Expect(jobResp.JobID).ToNot(BeEmpty()) + jobID := jobResp.JobID - for i, v := range getRespBody.Keys { - if v[0] == 0.1 { - Expect(getRespBody.Values[i]).To(Equal("test1")) - } else if v[0] == 0.4 { - Expect(getRespBody.Values[i]).To(Equal("test2")) - } else { - Expect(getRespBody.Values[i]).To(Equal("test3")) - } - } - - deleteBody := schema.StoresDelete{ - Keys: [][]float32{ - {0.1, 0.2, 0.3}, - }, - } - err = postRequestJSON(url+"delete", &deleteBody) + // Get job status + var job schema.Job + resp, err := http.Get("http://127.0.0.1:9090/api/agent/jobs/" + jobID) Expect(err).ToNot(HaveOccurred()) + Expect(resp.StatusCode).To(Equal(200)) + body, _ := io.ReadAll(resp.Body) + json.Unmarshal(body, &job) + Expect(job.ID).To(Equal(jobID)) + Expect(job.TaskID).To(Equal(taskID)) - findBody := schema.StoresFind{ - Key: []float32{0.1, 0.3, 0.7}, - Topk: 10, - } - - var findRespBody schema.StoresFindResponse - err = postRequestResponseJSON(url+"find", &findBody, &findRespBody) + // List jobs + resp, err = http.Get("http://127.0.0.1:9090/api/agent/jobs") Expect(err).ToNot(HaveOccurred()) - Expect(len(findRespBody.Keys)).To(Equal(2)) + Expect(resp.StatusCode).To(Equal(200)) + var jobs []schema.Job + body, _ = io.ReadAll(resp.Body) + json.Unmarshal(body, &jobs) + Expect(len(jobs)).To(BeNumerically(">=", 1)) - for i, v := range findRespBody.Keys { - if v[0] == 0.4 { - Expect(findRespBody.Values[i]).To(Equal("test2")) - } else { - Expect(findRespBody.Values[i]).To(Equal("test3")) - } - - Expect(findRespBody.Similarities[i]).To(BeNumerically(">=", -1)) - Expect(findRespBody.Similarities[i]).To(BeNumerically("<=", 1)) - } - }) - - Context("Agent Jobs", Label("agent-jobs"), func() { - It("creates and manages tasks", func() { - // Create a task - taskBody := map[string]any{ - "name": "Test Task", - "description": "Test Description", - "model": "testmodel.ggml", - "prompt": "Hello {{.name}}", - "enabled": true, - } - - var createResp map[string]any - err := postRequestResponseJSON("http://127.0.0.1:9090/api/agent/tasks", &taskBody, &createResp) - Expect(err).ToNot(HaveOccurred()) - Expect(createResp["id"]).ToNot(BeEmpty()) - taskID := createResp["id"].(string) - - // Get the task - var task schema.Task - resp, err := http.Get("http://127.0.0.1:9090/api/agent/tasks/" + taskID) - Expect(err).ToNot(HaveOccurred()) - Expect(resp.StatusCode).To(Equal(200)) - body, _ := io.ReadAll(resp.Body) - json.Unmarshal(body, &task) - Expect(task.Name).To(Equal("Test Task")) - - // List tasks - resp, err = http.Get("http://127.0.0.1:9090/api/agent/tasks") - Expect(err).ToNot(HaveOccurred()) - Expect(resp.StatusCode).To(Equal(200)) - var tasks []schema.Task - body, _ = io.ReadAll(resp.Body) - json.Unmarshal(body, &tasks) - Expect(len(tasks)).To(BeNumerically(">=", 1)) - - // Update task - taskBody["name"] = "Updated Task" - err = putRequestJSON("http://127.0.0.1:9090/api/agent/tasks/"+taskID, &taskBody) - Expect(err).ToNot(HaveOccurred()) - - // Verify update - resp, err = http.Get("http://127.0.0.1:9090/api/agent/tasks/" + taskID) - Expect(err).ToNot(HaveOccurred()) - body, _ = io.ReadAll(resp.Body) - json.Unmarshal(body, &task) - Expect(task.Name).To(Equal("Updated Task")) - - // Delete task - req, _ := http.NewRequest("DELETE", "http://127.0.0.1:9090/api/agent/tasks/"+taskID, nil) + // Cancel job (if still pending/running) + if job.Status == schema.JobStatusPending || job.Status == schema.JobStatusRunning { + req, _ := http.NewRequest("POST", "http://127.0.0.1:9090/api/agent/jobs/"+jobID+"/cancel", nil) req.Header.Set("Authorization", bearerKey) resp, err = http.DefaultClient.Do(req) Expect(err).ToNot(HaveOccurred()) Expect(resp.StatusCode).To(Equal(200)) - }) + } + }) - It("executes and monitors jobs", func() { - // Create a task first - taskBody := map[string]any{ - "name": "Job Test Task", - "model": "testmodel.ggml", - "prompt": "Say hello", - "enabled": true, - } + It("executes task by name", func() { + // Create a task with a specific name + taskBody := map[string]any{ + "name": "Named Task", + "model": "mock-model", + "prompt": "Hello", + "enabled": true, + } - var createResp map[string]any - err := postRequestResponseJSON("http://127.0.0.1:9090/api/agent/tasks", &taskBody, &createResp) - Expect(err).ToNot(HaveOccurred()) - taskID := createResp["id"].(string) + var createResp map[string]any + err := postRequestResponseJSON("http://127.0.0.1:9090/api/agent/tasks", &taskBody, &createResp) + Expect(err).ToNot(HaveOccurred()) - // Execute a job - jobBody := map[string]any{ - "task_id": taskID, - "parameters": map[string]string{}, - } - - var jobResp schema.JobExecutionResponse - err = postRequestResponseJSON("http://127.0.0.1:9090/api/agent/jobs/execute", &jobBody, &jobResp) - Expect(err).ToNot(HaveOccurred()) - Expect(jobResp.JobID).ToNot(BeEmpty()) - jobID := jobResp.JobID - - // Get job status - var job schema.Job - resp, err := http.Get("http://127.0.0.1:9090/api/agent/jobs/" + jobID) - Expect(err).ToNot(HaveOccurred()) - Expect(resp.StatusCode).To(Equal(200)) - body, _ := io.ReadAll(resp.Body) - json.Unmarshal(body, &job) - Expect(job.ID).To(Equal(jobID)) - Expect(job.TaskID).To(Equal(taskID)) - - // List jobs - resp, err = http.Get("http://127.0.0.1:9090/api/agent/jobs") - Expect(err).ToNot(HaveOccurred()) - Expect(resp.StatusCode).To(Equal(200)) - var jobs []schema.Job - body, _ = io.ReadAll(resp.Body) - json.Unmarshal(body, &jobs) - Expect(len(jobs)).To(BeNumerically(">=", 1)) - - // Cancel job (if still pending/running) - if job.Status == schema.JobStatusPending || job.Status == schema.JobStatusRunning { - req, _ := http.NewRequest("POST", "http://127.0.0.1:9090/api/agent/jobs/"+jobID+"/cancel", nil) - req.Header.Set("Authorization", bearerKey) - resp, err = http.DefaultClient.Do(req) - Expect(err).ToNot(HaveOccurred()) - Expect(resp.StatusCode).To(Equal(200)) - } - }) - - It("executes task by name", func() { - // Create a task with a specific name - taskBody := map[string]any{ - "name": "Named Task", - "model": "testmodel.ggml", - "prompt": "Hello", - "enabled": true, - } - - var createResp map[string]any - err := postRequestResponseJSON("http://127.0.0.1:9090/api/agent/tasks", &taskBody, &createResp) - Expect(err).ToNot(HaveOccurred()) - - // Execute by name - paramsBody := map[string]string{"param1": "value1"} - var jobResp schema.JobExecutionResponse - err = postRequestResponseJSON("http://127.0.0.1:9090/api/agent/tasks/Named Task/execute", ¶msBody, &jobResp) - Expect(err).ToNot(HaveOccurred()) - Expect(jobResp.JobID).ToNot(BeEmpty()) - }) + // Execute by name + paramsBody := map[string]string{"param1": "value1"} + var jobResp schema.JobExecutionResponse + err = postRequestResponseJSON("http://127.0.0.1:9090/api/agent/tasks/Named Task/execute", ¶msBody, &jobResp) + Expect(err).ToNot(HaveOccurred()) + Expect(jobResp.JobID).ToNot(BeEmpty()) }) }) }) + // Config file Context: exercises the path where models are loaded from a + // single multi-entry YAML (config_file option) rather than per-model YAMLs + // in the model dir. The fixtures point at mock-backend so this is a + // plumbing test for config-file loading and routing, not a real-inference + // test. Context("Config file", func() { BeforeEach(func() { - if runtime.GOOS != "linux" { - Skip("run this test only on linux") + if mockBackendPath == "" { + Skip("mock-backend binary not built; run 'make build-mock-backend'") } - modelPath := os.Getenv("MODELS_PATH") - backendPath := os.Getenv("BACKENDS_PATH") c, cancel = context.WithCancel(context.Background()) var err error + tmpdir, err = os.MkdirTemp("", "") + Expect(err).ToNot(HaveOccurred()) + modelDir = filepath.Join(tmpdir, "models") + Expect(os.Mkdir(modelDir, 0750)).To(Succeed()) + + // Inline config file with two list entries that both resolve to mock-backend. + // Mirrors the legacy testmodel.ggml shape so the test still proves that + // config-file loading registers each entry as a routable model. + configContent := `- name: list1 + parameters: + model: mock-model.bin + backend: mock-backend + context_size: 200 +- name: list2 + parameters: + model: mock-model.bin + backend: mock-backend + context_size: 200 +` + configFile := filepath.Join(tmpdir, "config.yaml") + Expect(os.WriteFile(configFile, []byte(configContent), 0644)).To(Succeed()) systemState, err := system.GetSystemState( - system.WithBackendPath(backendPath), - system.WithModelPath(modelPath), + system.WithBackendPath(backendDir), + system.WithModelPath(modelDir), ) Expect(err).ToNot(HaveOccurred()) @@ -1435,9 +944,10 @@ parameters: append(commonOpts, config.WithContext(c), config.WithSystemState(systemState), - config.WithConfigFile(os.Getenv("CONFIG_FILE")))..., + config.WithConfigFile(configFile))..., ) Expect(err).ToNot(HaveOccurred()) + application.ModelLoader().SetExternalBackend("mock-backend", mockBackendPath) app, err = API(application) Expect(err).ToNot(HaveOccurred()) @@ -1466,6 +976,7 @@ parameters: err := app.Shutdown(ctx) Expect(err).ToNot(HaveOccurred()) } + Expect(os.RemoveAll(tmpdir)).To(Succeed()) }) It("can generate chat completions from config file (list1)", func() { resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list1", Messages: []openai.ChatCompletionMessage{{Role: "user", Content: testPrompt}}}) diff --git a/core/http/http_suite_test.go b/core/http/http_suite_test.go index 805eb5b52..744c2c252 100644 --- a/core/http/http_suite_test.go +++ b/core/http/http_suite_test.go @@ -12,8 +12,41 @@ import ( var ( tmpdir string modelDir string + + // Backend directory and the mock-backend binary path. Resolved by + // findMockBackendBinary() from the same prebuilt artifact that + // tests/e2e uses, so the suite no longer needs llama-cpp / transformers + // / whisper / piper / stablediffusion-ggml builds. + backendDir string + mockBackendPath string ) +// findMockBackendBinary locates the mock-backend binary built by +// `make build-mock-backend`. Mirrors the lookup used by +// tests/e2e/e2e_suite_test.go so both suites consume the same artifact. +func findMockBackendBinary() (string, bool) { + candidates := []string{ + filepath.Join("..", "..", "tests", "e2e", "mock-backend", "mock-backend"), + filepath.Join("tests", "e2e", "mock-backend", "mock-backend"), + filepath.Join("..", "e2e", "mock-backend", "mock-backend"), + } + if wd, err := os.Getwd(); err == nil { + candidates = append(candidates, + filepath.Join(wd, "..", "..", "tests", "e2e", "mock-backend", "mock-backend"), + ) + } + for _, p := range candidates { + if info, err := os.Stat(p); err == nil && !info.IsDir() { + abs, absErr := filepath.Abs(p) + if absErr == nil { + return abs, true + } + return p, true + } + } + return "", false +} + func TestLocalAI(t *testing.T) { RegisterFailHandler(Fail) @@ -24,6 +57,15 @@ func TestLocalAI(t *testing.T) { err = os.Mkdir(modelDir, 0750) Expect(err).ToNot(HaveOccurred()) + backendDir = filepath.Join(tmpdir, "backends") + Expect(os.Mkdir(backendDir, 0750)).To(Succeed()) + + if p, ok := findMockBackendBinary(); ok { + mockBackendPath = p + // Make sure it's executable for the path the suite owns. + _ = os.Chmod(mockBackendPath, 0755) + } + AfterSuite(func() { err := os.RemoveAll(tmpdir) Expect(err).ToNot(HaveOccurred()) diff --git a/tests/e2e-aio/e2e_test.go b/tests/e2e-aio/e2e_test.go index cf6573f85..19b310460 100644 --- a/tests/e2e-aio/e2e_test.go +++ b/tests/e2e-aio/e2e_test.go @@ -41,6 +41,34 @@ var _ = Describe("E2E test", func() { Expect(len(resp.Choices)).To(Equal(1), fmt.Sprint(resp)) Expect(resp.Choices[0].Message.Content).To(Or(ContainSubstring("4"), ContainSubstring("four")), fmt.Sprint(resp.Choices[0].Message.Content)) }) + + // Smoke: verifies the AIO container streams chat completions end-to-end. + // Catches packaging/proxy regressions where the streaming path breaks + // even though non-streaming works. + It("streams correctly", func() { + model := "gpt-4" + stream := client.Chat.Completions.NewStreaming(context.TODO(), + openai.ChatCompletionNewParams{ + Model: model, + Messages: []openai.ChatCompletionMessageParamUnion{ + openai.UserMessage("Count to three."), + }, + }) + defer stream.Close() + + var chunks int + var combined string + for stream.Next() { + chunk := stream.Current() + if len(chunk.Choices) > 0 && chunk.Choices[0].Delta.Content != "" { + chunks++ + combined += chunk.Choices[0].Delta.Content + } + } + Expect(stream.Err()).ToNot(HaveOccurred()) + Expect(chunks).To(BeNumerically(">", 1), "expected multi-chunk stream, got %d", chunks) + Expect(combined).ToNot(BeEmpty(), "stream produced no content") + }) }) Context("function calls", func() { diff --git a/tests/e2e-backends/backend_test.go b/tests/e2e-backends/backend_test.go index 1820fceb5..cdcf1b74f 100644 --- a/tests/e2e-backends/backend_test.go +++ b/tests/e2e-backends/backend_test.go @@ -102,6 +102,8 @@ const ( capVoiceEmbed = "voice_embed" capVoiceVerify = "voice_verify" capVoiceAnalyze = "voice_analyze" + capLogprobs = "logprobs" + capLogitBias = "logit_bias" defaultPrompt = "The capital of France is" streamPrompt = "Once upon a time" @@ -422,6 +424,7 @@ var _ = Describe("Backend container", Ordered, func() { var chunks int var combined string + var firstChunks []string for { msg, err := stream.Recv() if err == io.EOF { @@ -431,12 +434,71 @@ var _ = Describe("Backend container", Ordered, func() { if len(msg.GetMessage()) > 0 { chunks++ combined += string(msg.GetMessage()) + if len(firstChunks) < 2 { + firstChunks = append(firstChunks, string(msg.GetMessage())) + } } } Expect(chunks).To(BeNumerically(">", 0), "no stream chunks received") + // Regression guard: a bug in llama-cpp's grpc-server.cpp caused the + // role-init array element to get the same ChatDelta stamped, duplicating + // the first content token. Applies to any streaming backend. + if len(firstChunks) >= 2 { + Expect(firstChunks[0]).NotTo(Equal(firstChunks[1]), + "first content token was duplicated: %v", firstChunks) + } GinkgoWriter.Printf("Stream: %d chunks, combined=%q\n", chunks, combined) }) + // Logprobs: backends that wire OpenAI-compatible logprobs return a + // JSON-encoded payload in Reply.logprobs (see backend.proto). The exact + // shape is backend-specific; we only assert that the field is populated + // when requested. Gated by capLogprobs because not every backend + // implements it. + It("returns logprobs when requested", func() { + if !caps[capLogprobs] { + Skip("logprobs capability not enabled") + } + ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second) + defer cancel() + res, err := client.Predict(ctx, &pb.PredictOptions{ + Prompt: prompt, + Tokens: 10, + Temperature: 0.1, + TopK: 40, + TopP: 0.9, + Logprobs: 1, + TopLogprobs: 1, + }) + Expect(err).NotTo(HaveOccurred()) + Expect(res.GetMessage()).NotTo(BeEmpty(), "Predict produced empty output") + Expect(res.GetLogprobs()).NotTo(BeEmpty(), "Reply.logprobs was empty when requested") + GinkgoWriter.Printf("Logprobs: %d bytes\n", len(res.GetLogprobs())) + }) + + // Logit bias: encoded as a JSON string keyed by token id. We don't + // know the model's tokenizer, so we exercise the API path with a + // nonsense bias map that any backend should accept and ignore for + // unknown ids. The assertion is that the request succeeds — proving + // the LogitBias plumbing is wired end-to-end. + It("accepts logit_bias when supplied", func() { + if !caps[capLogitBias] { + Skip("logit_bias capability not enabled") + } + ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second) + defer cancel() + res, err := client.Predict(ctx, &pb.PredictOptions{ + Prompt: prompt, + Tokens: 10, + Temperature: 0.1, + TopK: 40, + TopP: 0.9, + LogitBias: `{"1":-100}`, + }) + Expect(err).NotTo(HaveOccurred()) + Expect(res.GetMessage()).NotTo(BeEmpty(), "Predict produced empty output with logit_bias") + }) + It("computes embeddings via Embedding", func() { if !caps[capEmbeddings] { Skip("embeddings capability not enabled") diff --git a/tests/models_fixtures/completion.tmpl b/tests/models_fixtures/completion.tmpl deleted file mode 100644 index 9867cfcd3..000000000 --- a/tests/models_fixtures/completion.tmpl +++ /dev/null @@ -1 +0,0 @@ -{{.Input}} \ No newline at end of file diff --git a/tests/models_fixtures/config.yaml b/tests/models_fixtures/config.yaml deleted file mode 100644 index f61c2a7c0..000000000 --- a/tests/models_fixtures/config.yaml +++ /dev/null @@ -1,32 +0,0 @@ -- name: list1 - parameters: - model: testmodel.ggml - top_p: 80 - top_k: 0.9 - temperature: 0.1 - context_size: 200 - stopwords: - - "HUMAN:" - - "### Response:" - roles: - user: "HUMAN:" - system: "GPT:" - template: - completion: completion - chat: ggml-gpt4all-j -- name: list2 - parameters: - top_p: 80 - top_k: 0.9 - temperature: 0.1 - model: testmodel.ggml - context_size: 200 - stopwords: - - "HUMAN:" - - "### Response:" - roles: - user: "HUMAN:" - system: "GPT:" - template: - completion: completion - chat: ggml-gpt4all-j \ No newline at end of file diff --git a/tests/models_fixtures/embeddings.yaml b/tests/models_fixtures/embeddings.yaml deleted file mode 100644 index 76c4a56ad..000000000 --- a/tests/models_fixtures/embeddings.yaml +++ /dev/null @@ -1,4 +0,0 @@ -name: text-embedding-ada-002 -embeddings: true -parameters: - model: huggingface://hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF/llama-3.2-1b-instruct-q4_k_m.gguf \ No newline at end of file diff --git a/tests/models_fixtures/ggml-gpt4all-j.tmpl b/tests/models_fixtures/ggml-gpt4all-j.tmpl deleted file mode 100644 index f76b080ab..000000000 --- a/tests/models_fixtures/ggml-gpt4all-j.tmpl +++ /dev/null @@ -1,4 +0,0 @@ -The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response. -### Prompt: -{{.Input}} -### Response: diff --git a/tests/models_fixtures/gpt4.yaml b/tests/models_fixtures/gpt4.yaml deleted file mode 100644 index 43e77586d..000000000 --- a/tests/models_fixtures/gpt4.yaml +++ /dev/null @@ -1,16 +0,0 @@ -name: gpt4all -parameters: - model: testmodel.ggml - top_p: 80 - top_k: 0.9 - temperature: 0.1 -context_size: 200 -stopwords: -- "HUMAN:" -- "### Response:" -roles: - user: "HUMAN:" - system: "GPT:" -template: - completion: completion - chat: ggml-gpt4all-j \ No newline at end of file diff --git a/tests/models_fixtures/gpt4_2.yaml b/tests/models_fixtures/gpt4_2.yaml deleted file mode 100644 index 8a2111530..000000000 --- a/tests/models_fixtures/gpt4_2.yaml +++ /dev/null @@ -1,16 +0,0 @@ -name: gpt4all-2 -parameters: - model: testmodel.ggml - top_p: 80 - top_k: 0.9 - temperature: 0.1 -context_size: 200 -stopwords: -- "HUMAN:" -- "### Response:" -roles: - user: "HUMAN:" - system: "GPT:" -template: - completion: completion - chat: ggml-gpt4all-j \ No newline at end of file diff --git a/tests/models_fixtures/grpc.yaml b/tests/models_fixtures/grpc.yaml deleted file mode 100644 index 8c5199205..000000000 --- a/tests/models_fixtures/grpc.yaml +++ /dev/null @@ -1,5 +0,0 @@ -name: code-search-ada-code-001 -backend: sentencetransformers -embeddings: true -parameters: - model: all-MiniLM-L6-v2 \ No newline at end of file diff --git a/tests/models_fixtures/rwkv.yaml b/tests/models_fixtures/rwkv.yaml deleted file mode 100644 index f66cfe211..000000000 --- a/tests/models_fixtures/rwkv.yaml +++ /dev/null @@ -1,24 +0,0 @@ -name: rwkv_test -parameters: - model: huggingface://bartowski/rwkv-6-world-7b-GGUF/rwkv-6-world-7b-Q4_K_M.gguf - top_k: 80 - temperature: 0.9 - max_tokens: 4098 - top_p: 0.8 -context_size: 4098 - -roles: - user: "User: " - system: "System: " - assistant: "Assistant: " - -stopwords: -- 'Assistant:' -- '' - -template: - chat: | - {{.Input}} - Assistant: - completion: | - {{.Input}} \ No newline at end of file diff --git a/tests/models_fixtures/whisper.yaml b/tests/models_fixtures/whisper.yaml deleted file mode 100644 index 4ea99efce..000000000 --- a/tests/models_fixtures/whisper.yaml +++ /dev/null @@ -1,4 +0,0 @@ -name: whisper-1 -backend: whisper -parameters: - model: whisper-en \ No newline at end of file