refactor(tests): split app_test.go, move real-backend coverage to e2e-backends

core/http/app_test.go had grown to 1495 lines exercising three concerns at once: HTTP-layer integration, real-backend inference (llama-gguf, tts, stablediffusion, transformers embeddings, whisper), and service logic that already has unit-level coverage. Each PR paid for 6 backend builds plus real-model downloads to satisfy a single suite. Reorg per layer: - app_test.go (1495 -> 1003 lines) drives the mock-backend binary only. Kept: auth, routing, gallery API, file:// import, /system, agent-jobs HTTP plumbing, config-file model loading. Deleted real-inference specs (llama-gguf chat, ggml completions/streaming, logprobs, logit_bias, transcription, embeddings, External-gRPC, Stores duplicate, Model gallery Context). Lifted Agent Jobs out of the deleted Stores Context. - tests/e2e-backends/backend_test.go gains logprobs, logit_bias, and no-first-token-dup specs (the latter folded into PredictStream). Two new caps gate them so non-LLM backends opt out. - tests/e2e-aio/e2e_test.go gains a streaming smoke under Context("text") to catch container-level streaming regressions. - tests/models_fixtures/ removed; all fixtures referenced testmodel.ggml. app_test.go now writes per-Context inline mock-model YAMLs. CI: - test.yml + tests-e2e.yml gain paths-ignore (docs/, examples/, *.md, backend/) so docs and backend-only PRs skip them. test.yml drops the 6-backend Build step plus TRANSFORMER_BACKEND/GO_TAGS=tts; tests-apple drops the llama-cpp-darwin build. - New tests-aio.yml runs the AIO container nightly + on workflow_dispatch + master/tags. The tests-e2e-container job moved out of test.yml so PRs no longer pay AIO cost. - New tests-llama-cpp-smoke job in test-extra.yml runs on every PR with no detect-changes gate; pulls quay.io/go-skynet/local-ai-backends: master-cpu-llama-cpp (no build on PR) and exercises predict/stream/ logprobs/logit_bias against Qwen3-0.6B. This is the PR-acceptance real-backend gate after AIO moved to nightly. The path-gated heavy test-extra-backend-llama-cpp wrapper appends the same caps so it exercises the moved specs when the backend actually changes. Makefile: - Deleted test-models/testmodel.ggml (the wget chain), test-llama-gguf, test-tts, test-stablediffusion, test-realtime-models. test target drops --label-filter, HUGGINGFACE_GRPC, TRANSFORMER_BACKEND, TEST_DIR, FIXTURES, CONFIG_FILE, MODELS_PATH, BACKENDS_PATH; depends on build-mock-backend. test-stores keeps a focused entry point and depends on backends/local-store. clean-tests also clears the mock-backend binary. Net per typical Go-side PR: ~25min (6 backend builds + tests + AIO) + ~8min e2e drops to ~5min mock-backend test + ~8min e2e + ~5-10min llama-cpp-smoke (image pulled). Docs and backend-only PRs skip the always-on workflows entirely. Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Assisted-by: claude-code:claude-opus-4-7 [Edit] [Write] [Bash]
2026-04-30 03:55:58 -04:00 · 2026-04-27 23:09:20 +00:00
parent 3948b580d2
commit a0317d9926
18 changed files with 451 additions and 880 deletions
--- a/core/http/app_test.go
+++ b/core/http/app_test.go
@@ -9,7 +9,6 @@ import (
 	"net/http"
 	"os"
 	"path/filepath"
-	"runtime"
 	"time"

 	"github.com/mudler/LocalAI/core/application"
@@ -28,7 +27,6 @@ import (
 	"github.com/mudler/xlog"
 	openaigo "github.com/otiai10/openaigo"
 	"github.com/sashabaranov/go-openai"
-	"github.com/sashabaranov/go-openai/jsonschema"
 )

 const apiKey = "joshua"
@@ -322,7 +320,9 @@ var _ = Describe("API test", func() {
 			tmpdir, err = os.MkdirTemp("", "")
 			Expect(err).ToNot(HaveOccurred())

-			backendPath := os.Getenv("BACKENDS_PATH")
+			// No real backends needed — these specs cover gallery API, auth,
+			// routing, and file:// import. Use the suite-level empty backend dir.
+			backendPath := backendDir

 			modelDir = filepath.Join(tmpdir, "models")
 			err = os.Mkdir(modelDir, 0750)
@@ -671,258 +671,42 @@ parameters:
 		})
 	})

-	Context("Model gallery", func() {
+	Context("API query", func() {
 		BeforeEach(func() {
-			var err error
-			tmpdir, err = os.MkdirTemp("", "")
-
-			backendPath := os.Getenv("BACKENDS_PATH")
-
-			Expect(err).ToNot(HaveOccurred())
-			modelDir = filepath.Join(tmpdir, "models")
-			backendAssetsDir := filepath.Join(tmpdir, "backend-assets")
-			err = os.Mkdir(backendAssetsDir, 0750)
-			Expect(err).ToNot(HaveOccurred())
-
+			if mockBackendPath == "" {
+				Skip("mock-backend binary not built; run 'make build-mock-backend'")
+			}
 			c, cancel = context.WithCancel(context.Background())

-			galleries := []config.Gallery{
-				{
-					Name: "localai",
-					URL:  "https://raw.githubusercontent.com/mudler/LocalAI/refs/heads/master/gallery/index.yaml",
-				},
-			}
+			// Stand up an isolated model dir for this Context so the suite can
+			// register a mock-model config (read by /v1/models, /system, and the
+			// agent-jobs flow) without depending on real backend builds.
+			var err error
+			tmpdir, err = os.MkdirTemp("", "")
+			Expect(err).ToNot(HaveOccurred())
+			modelDir = filepath.Join(tmpdir, "models")
+			Expect(os.Mkdir(modelDir, 0750)).To(Succeed())
+
+			mockModelYAML := `name: mock-model
+backend: mock-backend
+parameters:
+  model: mock-model.bin
+`
+			Expect(os.WriteFile(filepath.Join(modelDir, "mock-model.yaml"), []byte(mockModelYAML), 0644)).To(Succeed())

 			systemState, err := system.GetSystemState(
-				system.WithBackendPath(backendPath),
+				system.WithBackendPath(backendDir),
 				system.WithModelPath(modelDir),
 			)
 			Expect(err).ToNot(HaveOccurred())

 			application, err := application.New(
 				append(commonOpts,
-					config.WithContext(c),
-					config.WithGeneratedContentDir(tmpdir),
-					config.WithSystemState(systemState),
-					config.WithGalleries(galleries),
-				)...,
-			)
-			Expect(err).ToNot(HaveOccurred())
-			app, err = API(application)
-			Expect(err).ToNot(HaveOccurred())
-
-			go func() {
-				if err := app.Start("127.0.0.1:9090"); err != nil && err != http.ErrServerClosed {
-					xlog.Error("server error", "error", err)
-				}
-			}()
-
-			defaultConfig := openai.DefaultConfig("")
-			defaultConfig.BaseURL = "http://127.0.0.1:9090/v1"
-
-			client2 = openaigo.NewClient("")
-			client2.BaseURL = defaultConfig.BaseURL
-
-			// Wait for API to be ready
-			client = openai.NewClientWithConfig(defaultConfig)
-			Eventually(func() error {
-				_, err := client.ListModels(context.TODO())
-				return err
-			}, "2m").ShouldNot(HaveOccurred())
-		})
-
-		AfterEach(func() {
-			cancel()
-			if app != nil {
-				ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
-				defer cancel()
-				err := app.Shutdown(ctx)
-				Expect(err).ToNot(HaveOccurred())
-			}
-			err := os.RemoveAll(tmpdir)
-			Expect(err).ToNot(HaveOccurred())
-			_, err = os.ReadDir(tmpdir)
-			Expect(err).To(HaveOccurred())
-		})
-
-		It("runs gguf models (chat)", Label("llama-gguf"), func() {
-			if runtime.GOOS != "linux" {
-				Skip("test supported only on linux")
-			}
-
-			modelName := "qwen3-1.7b"
-			response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
-				ID: "localai@" + modelName,
-			})
-
-			Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
-
-			uuid := response["uuid"].(string)
-
-			Eventually(func() bool {
-				response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
-				return response["processed"].(bool)
-			}, "900s", "10s").Should(Equal(true))
-
-			By("testing chat")
-			resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: modelName, Messages: []openai.ChatCompletionMessage{
-				{
-					Role:    "user",
-					Content: "How much is 2+2?",
-				},
-			}})
-			Expect(err).ToNot(HaveOccurred())
-			Expect(len(resp.Choices)).To(Equal(1))
-			Expect(resp.Choices[0].Message.Content).To(Or(ContainSubstring("4"), ContainSubstring("four")))
-
-			By("testing functions")
-			resp2, err := client.CreateChatCompletion(
-				context.TODO(),
-				openai.ChatCompletionRequest{
-					Model: modelName,
-					Messages: []openai.ChatCompletionMessage{
-						{
-							Role:    "user",
-							Content: "What is the weather like in San Francisco (celsius)?",
-						},
-					},
-					Functions: []openai.FunctionDefinition{
-						openai.FunctionDefinition{
-							Name:        "get_current_weather",
-							Description: "Get the current weather",
-							Parameters: jsonschema.Definition{
-								Type: jsonschema.Object,
-								Properties: map[string]jsonschema.Definition{
-									"location": {
-										Type:        jsonschema.String,
-										Description: "The city and state, e.g. San Francisco, CA",
-									},
-									"unit": {
-										Type: jsonschema.String,
-										Enum: []string{"celcius", "fahrenheit"},
-									},
-								},
-								Required: []string{"location"},
-							},
-						},
-					},
-				})
-			Expect(err).ToNot(HaveOccurred())
-			Expect(len(resp2.Choices)).To(Equal(1))
-			Expect(resp2.Choices[0].Message.FunctionCall).ToNot(BeNil())
-			Expect(resp2.Choices[0].Message.FunctionCall.Name).To(Equal("get_current_weather"), resp2.Choices[0].Message.FunctionCall.Name)
-
-			var res map[string]string
-			err = json.Unmarshal([]byte(resp2.Choices[0].Message.FunctionCall.Arguments), &res)
-			Expect(err).ToNot(HaveOccurred())
-			Expect(res["location"]).To(ContainSubstring("San Francisco"), fmt.Sprint(res))
-			Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res))
-			Expect(string(resp2.Choices[0].FinishReason)).To(Equal("function_call"), fmt.Sprint(resp2.Choices[0].FinishReason))
-		})
-
-		It("installs and is capable to run tts", Label("tts"), func() {
-			if runtime.GOOS != "linux" {
-				Skip("test supported only on linux")
-			}
-
-			response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
-				ID: "localai@voice-en-us-kathleen-low",
-			})
-
-			Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
-
-			uuid := response["uuid"].(string)
-
-			Eventually(func() bool {
-				response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
-				fmt.Println(response)
-				return response["processed"].(bool)
-			}, "360s", "10s").Should(Equal(true))
-
-			// An HTTP Post to the /tts endpoint should return a wav audio file
-			resp, err := http.Post("http://127.0.0.1:9090/tts", "application/json", bytes.NewBuffer([]byte(`{"input": "Hello world", "model": "voice-en-us-kathleen-low"}`)))
-			Expect(err).ToNot(HaveOccurred(), fmt.Sprint(resp))
-			dat, err := io.ReadAll(resp.Body)
-			Expect(err).ToNot(HaveOccurred(), fmt.Sprint(resp))
-
-			Expect(resp.StatusCode).To(Equal(200), fmt.Sprint(string(dat)))
-			Expect(resp.Header.Get("Content-Type")).To(Or(Equal("audio/x-wav"), Equal("audio/wav"), Equal("audio/vnd.wave")))
-		})
-		It("installs and is capable to generate images", Label("stablediffusion"), func() {
-			if runtime.GOOS != "linux" {
-				Skip("test supported only on linux")
-			}
-
-			response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
-				ID:   "localai@sd-1.5-ggml",
-				Name: "stablediffusion",
-			})
-
-			Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
-
-			uuid := response["uuid"].(string)
-
-			Eventually(func() bool {
-				response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
-				fmt.Println(response)
-				return response["processed"].(bool)
-			}, "1200s", "10s").Should(Equal(true))
-
-			resp, err := http.Post(
-				"http://127.0.0.1:9090/v1/images/generations",
-				"application/json",
-				bytes.NewBuffer([]byte(`{
-					 			"prompt": "a lovely cat",
-								"step": 1,  "seed":9000,
-					 			"size": "256x256", "n":2}`)))
-			// The response should contain an URL
-			Expect(err).ToNot(HaveOccurred(), fmt.Sprint(resp))
-			dat, err := io.ReadAll(resp.Body)
-			Expect(err).ToNot(HaveOccurred(), "error reading /image/generations response")
-
-			imgUrlResp := &schema.OpenAIResponse{}
-			err = json.Unmarshal(dat, imgUrlResp)
-			Expect(err).ToNot(HaveOccurred(), fmt.Sprint(dat))
-			Expect(imgUrlResp.Data).ToNot(Or(BeNil(), BeZero()))
-			imgUrl := imgUrlResp.Data[0].URL
-			Expect(imgUrl).To(ContainSubstring("http://127.0.0.1:9090/"), imgUrl)
-			Expect(imgUrl).To(ContainSubstring(".png"), imgUrl)
-
-			imgResp, err := http.Get(imgUrl)
-			Expect(err).To(BeNil())
-			Expect(imgResp).ToNot(BeNil())
-			Expect(imgResp.StatusCode).To(Equal(200))
-			Expect(imgResp.ContentLength).To(BeNumerically(">", 0))
-			imgData := make([]byte, 512)
-			count, err := io.ReadFull(imgResp.Body, imgData)
-			Expect(err).To(Or(BeNil(), MatchError(io.EOF)))
-			Expect(count).To(BeNumerically(">", 0))
-			Expect(count).To(BeNumerically("<=", 512))
-			Expect(http.DetectContentType(imgData)).To(Equal("image/png"))
-		})
-	})
-
-	Context("API query", func() {
-		BeforeEach(func() {
-			modelPath := os.Getenv("MODELS_PATH")
-			backendPath := os.Getenv("BACKENDS_PATH")
-			c, cancel = context.WithCancel(context.Background())
-
-			var err error
-
-			systemState, err := system.GetSystemState(
-				system.WithBackendPath(backendPath),
-				system.WithModelPath(modelPath),
-			)
-			Expect(err).ToNot(HaveOccurred())
-
-			application, err := application.New(
-				append(commonOpts,
-					config.WithExternalBackend("transformers", os.Getenv("TRANSFORMER_BACKEND")),
 					config.WithContext(c),
 					config.WithSystemState(systemState),
 				)...)
 			Expect(err).ToNot(HaveOccurred())
+			application.ModelLoader().SetExternalBackend("mock-backend", mockBackendPath)
 			app, err = API(application)
 			Expect(err).ToNot(HaveOccurred())
 			go func() {
@@ -952,149 +736,12 @@ parameters:
 				err := app.Shutdown(ctx)
 				Expect(err).ToNot(HaveOccurred())
 			}
+			Expect(os.RemoveAll(tmpdir)).To(Succeed())
 		})
 		It("returns the models list", func() {
 			models, err := client.ListModels(context.TODO())
 			Expect(err).ToNot(HaveOccurred())
-			Expect(len(models.Models)).To(BeNumerically(">=", 7))
-		})
-		It("can generate completions via ggml", func() {
-			if runtime.GOOS != "linux" {
-				Skip("test supported only on linux")
-			}
-			resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel.ggml", Prompt: testPrompt})
-			Expect(err).ToNot(HaveOccurred())
-			Expect(len(resp.Choices)).To(Equal(1))
-			Expect(resp.Choices[0].Text).ToNot(BeEmpty())
-		})
-
-		It("can generate chat completions via ggml", func() {
-			if runtime.GOOS != "linux" {
-				Skip("test supported only on linux")
-			}
-			resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "testmodel.ggml", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: testPrompt}}})
-			Expect(err).ToNot(HaveOccurred())
-			Expect(len(resp.Choices)).To(Equal(1))
-			Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
-		})
-
-		It("does not duplicate the first content token in streaming chat completions", Label("llama-gguf", "llama-gguf-stream"), func() {
-			if runtime.GOOS != "linux" {
-				Skip("test supported only on linux")
-			}
-			stream, err := client.CreateChatCompletionStream(context.TODO(), openai.ChatCompletionRequest{
-				Model:    "testmodel.ggml",
-				Messages: []openai.ChatCompletionMessage{{Role: "user", Content: testPrompt}},
-			})
-			Expect(err).ToNot(HaveOccurred())
-			defer stream.Close()
-
-			var contentParts []string
-			for {
-				chunk, err := stream.Recv()
-				if err == io.EOF {
-					break
-				}
-				Expect(err).ToNot(HaveOccurred())
-				if len(chunk.Choices) > 0 {
-					delta := chunk.Choices[0].Delta.Content
-					if delta != "" {
-						contentParts = append(contentParts, delta)
-					}
-				}
-			}
-
-			Expect(contentParts).ToNot(BeEmpty(), "Expected streaming content tokens")
-			// The first content token should appear exactly once.
-			// A bug in grpc-server.cpp caused the role-init array element
-			// to get the same ChatDelta stamped, duplicating the first token.
-			if len(contentParts) >= 2 {
-				Expect(contentParts[0]).ToNot(Equal(contentParts[1]),
-					"First content token was duplicated: %v", contentParts[:2])
-			}
-		})
-
-		It("returns logprobs in chat completions when requested", func() {
-			if runtime.GOOS != "linux" {
-				Skip("test only on linux")
-			}
-			topLogprobsVal := 3
-			response, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{
-				Model:       "testmodel.ggml",
-				LogProbs:    true,
-				TopLogProbs: topLogprobsVal,
-				Messages:    []openai.ChatCompletionMessage{{Role: "user", Content: testPrompt}}})
-			Expect(err).ToNot(HaveOccurred())
-
-			Expect(len(response.Choices)).To(Equal(1))
-			Expect(response.Choices[0].Message).ToNot(BeNil())
-			Expect(response.Choices[0].Message.Content).ToNot(BeEmpty())
-
-			// Verify logprobs are present and have correct structure
-			Expect(response.Choices[0].LogProbs).ToNot(BeNil())
-			Expect(response.Choices[0].LogProbs.Content).ToNot(BeEmpty())
-
-			Expect(len(response.Choices[0].LogProbs.Content)).To(BeNumerically(">", 1))
-
-			foundatLeastToken := ""
-			foundAtLeastBytes := []byte{}
-			foundAtLeastTopLogprobBytes := []byte{}
-			foundatLeastTopLogprob := ""
-			// Verify logprobs content structure matches OpenAI format
-			for _, logprobContent := range response.Choices[0].LogProbs.Content {
-				// Bytes can be empty for certain tokens (special tokens, etc.), so we don't require it
-				if len(logprobContent.Bytes) > 0 {
-					foundAtLeastBytes = logprobContent.Bytes
-				}
-				if len(logprobContent.Token) > 0 {
-					foundatLeastToken = logprobContent.Token
-				}
-				Expect(logprobContent.LogProb).To(BeNumerically("<=", 0)) // Logprobs are always <= 0
-				Expect(len(logprobContent.TopLogProbs)).To(BeNumerically(">", 1))
-
-				// If top_logprobs is requested, verify top_logprobs array respects the limit
-				if len(logprobContent.TopLogProbs) > 0 {
-					// Should respect top_logprobs limit (3 in this test)
-					Expect(len(logprobContent.TopLogProbs)).To(BeNumerically("<=", topLogprobsVal))
-					for _, topLogprob := range logprobContent.TopLogProbs {
-						if len(topLogprob.Bytes) > 0 {
-							foundAtLeastTopLogprobBytes = topLogprob.Bytes
-						}
-						if len(topLogprob.Token) > 0 {
-							foundatLeastTopLogprob = topLogprob.Token
-						}
-						Expect(topLogprob.LogProb).To(BeNumerically("<=", 0))
-					}
-				}
-			}
-
-			Expect(foundAtLeastBytes).ToNot(BeEmpty())
-			Expect(foundAtLeastTopLogprobBytes).ToNot(BeEmpty())
-			Expect(foundatLeastToken).ToNot(BeEmpty())
-			Expect(foundatLeastTopLogprob).ToNot(BeEmpty())
-		})
-
-		It("applies logit_bias to chat completions when requested", func() {
-			if runtime.GOOS != "linux" {
-				Skip("test only on linux")
-			}
-			// logit_bias is a map of token IDs (as strings) to bias values (-100 to 100)
-			// According to OpenAI API: modifies the likelihood of specified tokens appearing in the completion
-			logitBias := map[string]int{
-				"15043": 1, // Bias token ID 15043 (example token ID) with bias value 1
-			}
-			response, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{
-				Model:     "testmodel.ggml",
-				Messages:  []openai.ChatCompletionMessage{{Role: "user", Content: testPrompt}},
-				LogitBias: logitBias,
-			})
-			Expect(err).ToNot(HaveOccurred())
-			Expect(len(response.Choices)).To(Equal(1))
-			Expect(response.Choices[0].Message).ToNot(BeNil())
-			Expect(response.Choices[0].Message.Content).ToNot(BeEmpty())
-			// If logit_bias is applied, the response should be generated successfully
-			// We can't easily verify the bias effect without knowing the actual token IDs for the model,
-			// but the fact that the request succeeds confirms the API accepts and processes logit_bias
+			Expect(len(models.Models)).To(BeNumerically(">=", 1))
 		})

 		It("returns errors", func() {
@@ -1103,331 +750,193 @@ parameters:
 			Expect(err.Error()).To(ContainSubstring("error, status code: 404, status: 404 Not Found"))
 		})

-		It("shows the external backend", func() {
-			// Only run on linux
-			if runtime.GOOS != "linux" {
-				Skip("test supported only on linux")
-			}
-			// do an http request to the /system endpoint
+		It("shows the external backend on /system", func() {
+			// /system reports the backends available to the application.
+			// Mock-backend is registered via SetExternalBackend so it appears
+			// alongside any built-in entries; verifying that string proves the
+			// endpoint is wired up regardless of which real backends exist.
 			resp, err := http.Get("http://127.0.0.1:9090/system")
 			Expect(err).ToNot(HaveOccurred())
 			Expect(resp.StatusCode).To(Equal(200))
 			dat, err := io.ReadAll(resp.Body)
 			Expect(err).ToNot(HaveOccurred())
-			Expect(string(dat)).To(ContainSubstring("llama-cpp"))
+			Expect(string(dat)).To(ContainSubstring("mock-backend"))
 		})

-		It("transcribes audio", func() {
-			if runtime.GOOS != "linux" {
-				Skip("test supported only on linux")
-			}
-			resp, err := client.CreateTranscription(
-				context.Background(),
-				openai.AudioRequest{
-					Model:    openai.Whisper1,
-					FilePath: filepath.Join(os.Getenv("TEST_DIR"), "audio.wav"),
-				},
-			)
-			Expect(err).ToNot(HaveOccurred())
-			Expect(resp.Text).To(ContainSubstring("This is the Micro Machine Man presenting"))
-		})
-
-		It("calculate embeddings", func() {
-			if runtime.GOOS != "linux" {
-				Skip("test supported only on linux")
-			}
-			embeddingModel := openai.AdaEmbeddingV2
-			resp, err := client.CreateEmbeddings(
-				context.Background(),
-				openai.EmbeddingRequest{
-					Model: embeddingModel,
-					Input: []string{"sun", "cat"},
-				},
-			)
-			Expect(err).ToNot(HaveOccurred(), err)
-			Expect(len(resp.Data[0].Embedding)).To(BeNumerically("==", 4096))
-			Expect(len(resp.Data[1].Embedding)).To(BeNumerically("==", 4096))
-
-			sunEmbedding := resp.Data[0].Embedding
-			resp2, err := client.CreateEmbeddings(
-				context.Background(),
-				openai.EmbeddingRequest{
-					Model: embeddingModel,
-					Input: []string{"sun"},
-				},
-			)
-			Expect(err).ToNot(HaveOccurred())
-			Expect(resp2.Data[0].Embedding).To(Equal(sunEmbedding))
-			Expect(resp2.Data[0].Embedding).ToNot(Equal(resp.Data[1].Embedding))
-
-			resp3, err := client.CreateEmbeddings(
-				context.Background(),
-				openai.EmbeddingRequest{
-					Model: embeddingModel,
-					Input: []string{"cat"},
-				},
-			)
-			Expect(err).ToNot(HaveOccurred())
-			Expect(resp3.Data[0].Embedding).To(Equal(resp.Data[1].Embedding))
-			Expect(resp3.Data[0].Embedding).ToNot(Equal(sunEmbedding))
-		})
-
-		Context("External gRPC calls", func() {
-			It("calculate embeddings with sentencetransformers", func() {
-				if runtime.GOOS != "linux" {
-					Skip("test supported only on linux")
+		// Agent Jobs: HTTP API for task/job scheduling. The underlying AgentPool
+		// service is exercised in core/services/agentpool/agent_jobs_test.go;
+		// these specs cover the /api/agent/* HTTP plumbing on top.
+		Context("Agent Jobs", func() {
+			It("creates and manages tasks", func() {
+				// Create a task
+				taskBody := map[string]any{
+					"name":        "Test Task",
+					"description": "Test Description",
+					"model":       "mock-model",
+					"prompt":      "Hello {{.name}}",
+					"enabled":     true,
 				}
-				resp, err := client.CreateEmbeddings(
-					context.Background(),
-					openai.EmbeddingRequest{
-						Model: openai.AdaCodeSearchCode,
-						Input: []string{"sun", "cat"},
-					},
-				)
+
+				var createResp map[string]any
+				err := postRequestResponseJSON("http://127.0.0.1:9090/api/agent/tasks", &taskBody, &createResp)
 				Expect(err).ToNot(HaveOccurred())
-				Expect(len(resp.Data[0].Embedding)).To(BeNumerically("==", 384))
-				Expect(len(resp.Data[1].Embedding)).To(BeNumerically("==", 384))
+				Expect(createResp["id"]).ToNot(BeEmpty())
+				taskID := createResp["id"].(string)

-				sunEmbedding := resp.Data[0].Embedding
-				resp2, err := client.CreateEmbeddings(
-					context.Background(),
-					openai.EmbeddingRequest{
-						Model: openai.AdaCodeSearchCode,
-						Input: []string{"sun"},
-					},
-				)
+				// Get the task
+				var task schema.Task
+				resp, err := http.Get("http://127.0.0.1:9090/api/agent/tasks/" + taskID)
 				Expect(err).ToNot(HaveOccurred())
-				Expect(resp2.Data[0].Embedding).To(Equal(sunEmbedding))
-				Expect(resp2.Data[0].Embedding).ToNot(Equal(resp.Data[1].Embedding))
-			})
-		})
+				Expect(resp.StatusCode).To(Equal(200))
+				body, _ := io.ReadAll(resp.Body)
+				json.Unmarshal(body, &task)
+				Expect(task.Name).To(Equal("Test Task"))

-		// See tests/integration/stores_test
-		Context("Stores", Label("stores"), func() {
+				// List tasks
+				resp, err = http.Get("http://127.0.0.1:9090/api/agent/tasks")
+				Expect(err).ToNot(HaveOccurred())
+				Expect(resp.StatusCode).To(Equal(200))
+				var tasks []schema.Task
+				body, _ = io.ReadAll(resp.Body)
+				json.Unmarshal(body, &tasks)
+				Expect(len(tasks)).To(BeNumerically(">=", 1))

-			BeforeEach(func() {
-				// Only run on linux
-				if runtime.GOOS != "linux" {
-					Skip("test supported only on linux")
-				}
+				// Update task
+				taskBody["name"] = "Updated Task"
+				err = putRequestJSON("http://127.0.0.1:9090/api/agent/tasks/"+taskID, &taskBody)
+				Expect(err).ToNot(HaveOccurred())
+
+				// Verify update
+				resp, err = http.Get("http://127.0.0.1:9090/api/agent/tasks/" + taskID)
+				Expect(err).ToNot(HaveOccurred())
+				body, _ = io.ReadAll(resp.Body)
+				json.Unmarshal(body, &task)
+				Expect(task.Name).To(Equal("Updated Task"))
+
+				// Delete task
+				req, _ := http.NewRequest("DELETE", "http://127.0.0.1:9090/api/agent/tasks/"+taskID, nil)
+				req.Header.Set("Authorization", bearerKey)
+				resp, err = http.DefaultClient.Do(req)
+				Expect(err).ToNot(HaveOccurred())
+				Expect(resp.StatusCode).To(Equal(200))
 			})

-			It("sets, gets, finds and deletes entries", func() {
-				ks := [][]float32{
-					{0.1, 0.2, 0.3},
-					{0.4, 0.5, 0.6},
-					{0.7, 0.8, 0.9},
-				}
-				vs := []string{
-					"test1",
-					"test2",
-					"test3",
-				}
-				setBody := schema.StoresSet{
-					Keys:   ks,
-					Values: vs,
+			It("executes and monitors jobs", func() {
+				// Create a task first
+				taskBody := map[string]any{
+					"name":    "Job Test Task",
+					"model":   "mock-model",
+					"prompt":  "Say hello",
+					"enabled": true,
 				}

-				url := "http://127.0.0.1:9090/stores/"
-				err := postRequestJSON(url+"set", &setBody)
+				var createResp map[string]any
+				err := postRequestResponseJSON("http://127.0.0.1:9090/api/agent/tasks", &taskBody, &createResp)
 				Expect(err).ToNot(HaveOccurred())
+				taskID := createResp["id"].(string)

-				getBody := schema.StoresGet{
-					Keys: ks,
+				// Execute a job
+				jobBody := map[string]any{
+					"task_id":    taskID,
+					"parameters": map[string]string{},
 				}
-				var getRespBody schema.StoresGetResponse
-				err = postRequestResponseJSON(url+"get", &getBody, &getRespBody)
+
+				var jobResp schema.JobExecutionResponse
+				err = postRequestResponseJSON("http://127.0.0.1:9090/api/agent/jobs/execute", &jobBody, &jobResp)
 				Expect(err).ToNot(HaveOccurred())
-				Expect(len(getRespBody.Keys)).To(Equal(len(ks)))
+				Expect(jobResp.JobID).ToNot(BeEmpty())
+				jobID := jobResp.JobID

-				for i, v := range getRespBody.Keys {
-					if v[0] == 0.1 {
-						Expect(getRespBody.Values[i]).To(Equal("test1"))
-					} else if v[0] == 0.4 {
-						Expect(getRespBody.Values[i]).To(Equal("test2"))
-					} else {
-						Expect(getRespBody.Values[i]).To(Equal("test3"))
-					}
-				}
-
-				deleteBody := schema.StoresDelete{
-					Keys: [][]float32{
-						{0.1, 0.2, 0.3},
-					},
-				}
-				err = postRequestJSON(url+"delete", &deleteBody)
+				// Get job status
+				var job schema.Job
+				resp, err := http.Get("http://127.0.0.1:9090/api/agent/jobs/" + jobID)
 				Expect(err).ToNot(HaveOccurred())
+				Expect(resp.StatusCode).To(Equal(200))
+				body, _ := io.ReadAll(resp.Body)
+				json.Unmarshal(body, &job)
+				Expect(job.ID).To(Equal(jobID))
+				Expect(job.TaskID).To(Equal(taskID))

-				findBody := schema.StoresFind{
-					Key:  []float32{0.1, 0.3, 0.7},
-					Topk: 10,
-				}
-
-				var findRespBody schema.StoresFindResponse
-				err = postRequestResponseJSON(url+"find", &findBody, &findRespBody)
+				// List jobs
+				resp, err = http.Get("http://127.0.0.1:9090/api/agent/jobs")
 				Expect(err).ToNot(HaveOccurred())
-				Expect(len(findRespBody.Keys)).To(Equal(2))
+				Expect(resp.StatusCode).To(Equal(200))
+				var jobs []schema.Job
+				body, _ = io.ReadAll(resp.Body)
+				json.Unmarshal(body, &jobs)
+				Expect(len(jobs)).To(BeNumerically(">=", 1))

-				for i, v := range findRespBody.Keys {
-					if v[0] == 0.4 {
-						Expect(findRespBody.Values[i]).To(Equal("test2"))
-					} else {
-						Expect(findRespBody.Values[i]).To(Equal("test3"))
-					}
-
-					Expect(findRespBody.Similarities[i]).To(BeNumerically(">=", -1))
-					Expect(findRespBody.Similarities[i]).To(BeNumerically("<=", 1))
-				}
-			})
-
-			Context("Agent Jobs", Label("agent-jobs"), func() {
-				It("creates and manages tasks", func() {
-					// Create a task
-					taskBody := map[string]any{
-						"name":        "Test Task",
-						"description": "Test Description",
-						"model":       "testmodel.ggml",
-						"prompt":      "Hello {{.name}}",
-						"enabled":     true,
-					}
-
-					var createResp map[string]any
-					err := postRequestResponseJSON("http://127.0.0.1:9090/api/agent/tasks", &taskBody, &createResp)
-					Expect(err).ToNot(HaveOccurred())
-					Expect(createResp["id"]).ToNot(BeEmpty())
-					taskID := createResp["id"].(string)
-
-					// Get the task
-					var task schema.Task
-					resp, err := http.Get("http://127.0.0.1:9090/api/agent/tasks/" + taskID)
-					Expect(err).ToNot(HaveOccurred())
-					Expect(resp.StatusCode).To(Equal(200))
-					body, _ := io.ReadAll(resp.Body)
-					json.Unmarshal(body, &task)
-					Expect(task.Name).To(Equal("Test Task"))
-
-					// List tasks
-					resp, err = http.Get("http://127.0.0.1:9090/api/agent/tasks")
-					Expect(err).ToNot(HaveOccurred())
-					Expect(resp.StatusCode).To(Equal(200))
-					var tasks []schema.Task
-					body, _ = io.ReadAll(resp.Body)
-					json.Unmarshal(body, &tasks)
-					Expect(len(tasks)).To(BeNumerically(">=", 1))
-
-					// Update task
-					taskBody["name"] = "Updated Task"
-					err = putRequestJSON("http://127.0.0.1:9090/api/agent/tasks/"+taskID, &taskBody)
-					Expect(err).ToNot(HaveOccurred())
-
-					// Verify update
-					resp, err = http.Get("http://127.0.0.1:9090/api/agent/tasks/" + taskID)
-					Expect(err).ToNot(HaveOccurred())
-					body, _ = io.ReadAll(resp.Body)
-					json.Unmarshal(body, &task)
-					Expect(task.Name).To(Equal("Updated Task"))
-
-					// Delete task
-					req, _ := http.NewRequest("DELETE", "http://127.0.0.1:9090/api/agent/tasks/"+taskID, nil)
+				// Cancel job (if still pending/running)
+				if job.Status == schema.JobStatusPending || job.Status == schema.JobStatusRunning {
+					req, _ := http.NewRequest("POST", "http://127.0.0.1:9090/api/agent/jobs/"+jobID+"/cancel", nil)
 					req.Header.Set("Authorization", bearerKey)
 					resp, err = http.DefaultClient.Do(req)
 					Expect(err).ToNot(HaveOccurred())
 					Expect(resp.StatusCode).To(Equal(200))
-				})
+				}
+			})

-				It("executes and monitors jobs", func() {
-					// Create a task first
-					taskBody := map[string]any{
-						"name":    "Job Test Task",
-						"model":   "testmodel.ggml",
-						"prompt":  "Say hello",
-						"enabled": true,
-					}
+			It("executes task by name", func() {
+				// Create a task with a specific name
+				taskBody := map[string]any{
+					"name":    "Named Task",
+					"model":   "mock-model",
+					"prompt":  "Hello",
+					"enabled": true,
+				}

-					var createResp map[string]any
-					err := postRequestResponseJSON("http://127.0.0.1:9090/api/agent/tasks", &taskBody, &createResp)
-					Expect(err).ToNot(HaveOccurred())
-					taskID := createResp["id"].(string)
+				var createResp map[string]any
+				err := postRequestResponseJSON("http://127.0.0.1:9090/api/agent/tasks", &taskBody, &createResp)
+				Expect(err).ToNot(HaveOccurred())

-					// Execute a job
-					jobBody := map[string]any{
-						"task_id":    taskID,
-						"parameters": map[string]string{},
-					}
-
-					var jobResp schema.JobExecutionResponse
-					err = postRequestResponseJSON("http://127.0.0.1:9090/api/agent/jobs/execute", &jobBody, &jobResp)
-					Expect(err).ToNot(HaveOccurred())
-					Expect(jobResp.JobID).ToNot(BeEmpty())
-					jobID := jobResp.JobID
-
-					// Get job status
-					var job schema.Job
-					resp, err := http.Get("http://127.0.0.1:9090/api/agent/jobs/" + jobID)
-					Expect(err).ToNot(HaveOccurred())
-					Expect(resp.StatusCode).To(Equal(200))
-					body, _ := io.ReadAll(resp.Body)
-					json.Unmarshal(body, &job)
-					Expect(job.ID).To(Equal(jobID))
-					Expect(job.TaskID).To(Equal(taskID))
-
-					// List jobs
-					resp, err = http.Get("http://127.0.0.1:9090/api/agent/jobs")
-					Expect(err).ToNot(HaveOccurred())
-					Expect(resp.StatusCode).To(Equal(200))
-					var jobs []schema.Job
-					body, _ = io.ReadAll(resp.Body)
-					json.Unmarshal(body, &jobs)
-					Expect(len(jobs)).To(BeNumerically(">=", 1))
-
-					// Cancel job (if still pending/running)
-					if job.Status == schema.JobStatusPending || job.Status == schema.JobStatusRunning {
-						req, _ := http.NewRequest("POST", "http://127.0.0.1:9090/api/agent/jobs/"+jobID+"/cancel", nil)
-						req.Header.Set("Authorization", bearerKey)
-						resp, err = http.DefaultClient.Do(req)
-						Expect(err).ToNot(HaveOccurred())
-						Expect(resp.StatusCode).To(Equal(200))
-					}
-				})
-
-				It("executes task by name", func() {
-					// Create a task with a specific name
-					taskBody := map[string]any{
-						"name":    "Named Task",
-						"model":   "testmodel.ggml",
-						"prompt":  "Hello",
-						"enabled": true,
-					}
-
-					var createResp map[string]any
-					err := postRequestResponseJSON("http://127.0.0.1:9090/api/agent/tasks", &taskBody, &createResp)
-					Expect(err).ToNot(HaveOccurred())
-
-					// Execute by name
-					paramsBody := map[string]string{"param1": "value1"}
-					var jobResp schema.JobExecutionResponse
-					err = postRequestResponseJSON("http://127.0.0.1:9090/api/agent/tasks/Named Task/execute", &paramsBody, &jobResp)
-					Expect(err).ToNot(HaveOccurred())
-					Expect(jobResp.JobID).ToNot(BeEmpty())
-				})
+				// Execute by name
+				paramsBody := map[string]string{"param1": "value1"}
+				var jobResp schema.JobExecutionResponse
+				err = postRequestResponseJSON("http://127.0.0.1:9090/api/agent/tasks/Named Task/execute", &paramsBody, &jobResp)
+				Expect(err).ToNot(HaveOccurred())
+				Expect(jobResp.JobID).ToNot(BeEmpty())
 			})
 		})
 	})

+	// Config file Context: exercises the path where models are loaded from a
+	// single multi-entry YAML (config_file option) rather than per-model YAMLs
+	// in the model dir. The fixtures point at mock-backend so this is a
+	// plumbing test for config-file loading and routing, not a real-inference
+	// test.
 	Context("Config file", func() {
 		BeforeEach(func() {
-			if runtime.GOOS != "linux" {
-				Skip("run this test only on linux")
+			if mockBackendPath == "" {
+				Skip("mock-backend binary not built; run 'make build-mock-backend'")
 			}
-			modelPath := os.Getenv("MODELS_PATH")
-			backendPath := os.Getenv("BACKENDS_PATH")
 			c, cancel = context.WithCancel(context.Background())

 			var err error
+			tmpdir, err = os.MkdirTemp("", "")
+			Expect(err).ToNot(HaveOccurred())
+			modelDir = filepath.Join(tmpdir, "models")
+			Expect(os.Mkdir(modelDir, 0750)).To(Succeed())
+
+			// Inline config file with two list entries that both resolve to mock-backend.
+			// Mirrors the legacy testmodel.ggml shape so the test still proves that
+			// config-file loading registers each entry as a routable model.
+			configContent := `- name: list1
+  parameters:
+    model: mock-model.bin
+  backend: mock-backend
+  context_size: 200
+- name: list2
+  parameters:
+    model: mock-model.bin
+  backend: mock-backend
+  context_size: 200
+`
+			configFile := filepath.Join(tmpdir, "config.yaml")
+			Expect(os.WriteFile(configFile, []byte(configContent), 0644)).To(Succeed())

 			systemState, err := system.GetSystemState(
-				system.WithBackendPath(backendPath),
-				system.WithModelPath(modelPath),
+				system.WithBackendPath(backendDir),
+				system.WithModelPath(modelDir),
 			)
 			Expect(err).ToNot(HaveOccurred())

@@ -1435,9 +944,10 @@ parameters:
 				append(commonOpts,
 					config.WithContext(c),
 					config.WithSystemState(systemState),
-					config.WithConfigFile(os.Getenv("CONFIG_FILE")))...,
+					config.WithConfigFile(configFile))...,
 			)
 			Expect(err).ToNot(HaveOccurred())
+			application.ModelLoader().SetExternalBackend("mock-backend", mockBackendPath)
 			app, err = API(application)
 			Expect(err).ToNot(HaveOccurred())

@@ -1466,6 +976,7 @@ parameters:
 				err := app.Shutdown(ctx)
 				Expect(err).ToNot(HaveOccurred())
 			}
+			Expect(os.RemoveAll(tmpdir)).To(Succeed())
 		})
 		It("can generate chat completions from config file (list1)", func() {
 			resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list1", Messages: []openai.ChatCompletionMessage{{Role: "user", Content: testPrompt}}})
--- a/core/http/http_suite_test.go
+++ b/core/http/http_suite_test.go
@@ -12,8 +12,41 @@ import (
 var (
 	tmpdir   string
 	modelDir string
+
+	// Backend directory and the mock-backend binary path. Resolved by
+	// findMockBackendBinary() from the same prebuilt artifact that
+	// tests/e2e uses, so the suite no longer needs llama-cpp / transformers
+	// / whisper / piper / stablediffusion-ggml builds.
+	backendDir      string
+	mockBackendPath string
 )

+// findMockBackendBinary locates the mock-backend binary built by
+// `make build-mock-backend`. Mirrors the lookup used by
+// tests/e2e/e2e_suite_test.go so both suites consume the same artifact.
+func findMockBackendBinary() (string, bool) {
+	candidates := []string{
+		filepath.Join("..", "..", "tests", "e2e", "mock-backend", "mock-backend"),
+		filepath.Join("tests", "e2e", "mock-backend", "mock-backend"),
+		filepath.Join("..", "e2e", "mock-backend", "mock-backend"),
+	}
+	if wd, err := os.Getwd(); err == nil {
+		candidates = append(candidates,
+			filepath.Join(wd, "..", "..", "tests", "e2e", "mock-backend", "mock-backend"),
+		)
+	}
+	for _, p := range candidates {
+		if info, err := os.Stat(p); err == nil && !info.IsDir() {
+			abs, absErr := filepath.Abs(p)
+			if absErr == nil {
+				return abs, true
+			}
+			return p, true
+		}
+	}
+	return "", false
+}
+
 func TestLocalAI(t *testing.T) {
 	RegisterFailHandler(Fail)

@@ -24,6 +57,15 @@ func TestLocalAI(t *testing.T) {
 	err = os.Mkdir(modelDir, 0750)
 	Expect(err).ToNot(HaveOccurred())

+	backendDir = filepath.Join(tmpdir, "backends")
+	Expect(os.Mkdir(backendDir, 0750)).To(Succeed())
+
+	if p, ok := findMockBackendBinary(); ok {
+		mockBackendPath = p
+		// Make sure it's executable for the path the suite owns.
+		_ = os.Chmod(mockBackendPath, 0755)
+	}
+
 	AfterSuite(func() {
 		err := os.RemoveAll(tmpdir)
 		Expect(err).ToNot(HaveOccurred())