mirror of
https://github.com/mudler/LocalAI.git
synced 2026-04-16 12:59:33 -04:00
test(e2e-backends): add tools capability + HF model name support
Extends tests/e2e-backends to cover backends that:
- Resolve HuggingFace model ids natively (vllm, vllm-omni) instead of
loading a local file: BACKEND_TEST_MODEL_NAME is passed verbatim as
ModelOptions.Model with no download/ModelFile.
- Parse tool calls into ChatDelta.tool_calls: new "tools" capability
sends a Predict with a get_weather function definition and asserts
the Reply contains a matching ToolCallDelta. Uses UseTokenizerTemplate
with OpenAI-style Messages so the backend can wire tools into the
model's chat template.
- Need backend-specific Options[]: BACKEND_TEST_OPTIONS lets a test set
e.g. "tool_parser:hermes,reasoning_parser:qwen3" at LoadModel time.
Adds make target test-extra-backend-vllm that:
- docker-build-vllm
- loads Qwen/Qwen2.5-0.5B-Instruct
- runs health,load,predict,stream,tools with tool_parser:hermes
Drops backend/python/vllm/test_{cpu_inference,tool_calls}.py — those
standalone scripts were scaffolding used while bringing up the Python
backend; the e2e-backends harness now covers the same ground uniformly
alongside llama-cpp and ik-llama-cpp.
This commit is contained in:
21
Makefile
21
Makefile
@@ -466,8 +466,14 @@ test-extra: prepare-test-extra
|
||||
## BACKEND_IMAGE Required. Docker image to test, e.g. local-ai-backend:llama-cpp.
|
||||
## BACKEND_TEST_MODEL_URL URL of a model file to download and load.
|
||||
## BACKEND_TEST_MODEL_FILE Path to an already-downloaded model (skips download).
|
||||
## BACKEND_TEST_MODEL_NAME HuggingFace repo id (e.g. Qwen/Qwen2.5-0.5B-Instruct).
|
||||
## Use this instead of MODEL_URL for backends that
|
||||
## resolve HF model ids natively (vllm, vllm-omni).
|
||||
## BACKEND_TEST_CAPS Comma-separated capabilities, default "health,load,predict,stream".
|
||||
## Adds "tools" to exercise ChatDelta tool call extraction.
|
||||
## BACKEND_TEST_PROMPT Override the prompt used in predict/stream specs.
|
||||
## BACKEND_TEST_OPTIONS Comma-separated Options[] entries forwarded to LoadModel,
|
||||
## e.g. "tool_parser:hermes,reasoning_parser:qwen3".
|
||||
##
|
||||
## Direct usage (image already built, no docker-build-* dependency):
|
||||
##
|
||||
@@ -486,9 +492,13 @@ test-extra-backend: protogen-go
|
||||
BACKEND_IMAGE="$$BACKEND_IMAGE" \
|
||||
BACKEND_TEST_MODEL_URL="$${BACKEND_TEST_MODEL_URL:-$(BACKEND_TEST_MODEL_URL)}" \
|
||||
BACKEND_TEST_MODEL_FILE="$$BACKEND_TEST_MODEL_FILE" \
|
||||
BACKEND_TEST_MODEL_NAME="$$BACKEND_TEST_MODEL_NAME" \
|
||||
BACKEND_TEST_CAPS="$$BACKEND_TEST_CAPS" \
|
||||
BACKEND_TEST_PROMPT="$$BACKEND_TEST_PROMPT" \
|
||||
go test -v -timeout 15m ./tests/e2e-backends/...
|
||||
BACKEND_TEST_OPTIONS="$$BACKEND_TEST_OPTIONS" \
|
||||
BACKEND_TEST_TOOL_PROMPT="$$BACKEND_TEST_TOOL_PROMPT" \
|
||||
BACKEND_TEST_TOOL_NAME="$$BACKEND_TEST_TOOL_NAME" \
|
||||
go test -v -timeout 30m ./tests/e2e-backends/...
|
||||
|
||||
## Convenience wrappers: build the image, then exercise it.
|
||||
test-extra-backend-llama-cpp: docker-build-llama-cpp
|
||||
@@ -497,6 +507,15 @@ test-extra-backend-llama-cpp: docker-build-llama-cpp
|
||||
test-extra-backend-ik-llama-cpp: docker-build-ik-llama-cpp
|
||||
BACKEND_IMAGE=local-ai-backend:ik-llama-cpp $(MAKE) test-extra-backend
|
||||
|
||||
## vllm is resolved from a HuggingFace model id (no file download) and
|
||||
## exercises Predict + streaming + tool-call extraction via the hermes parser.
|
||||
test-extra-backend-vllm: docker-build-vllm
|
||||
BACKEND_IMAGE=local-ai-backend:vllm \
|
||||
BACKEND_TEST_MODEL_NAME=Qwen/Qwen2.5-0.5B-Instruct \
|
||||
BACKEND_TEST_CAPS=health,load,predict,stream,tools \
|
||||
BACKEND_TEST_OPTIONS=tool_parser:hermes \
|
||||
$(MAKE) test-extra-backend
|
||||
|
||||
DOCKER_IMAGE?=local-ai
|
||||
IMAGE_TYPE?=core
|
||||
BASE_IMAGE?=ubuntu:24.04
|
||||
|
||||
@@ -1,101 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""End-to-end CPU inference smoke test for the vllm backend.
|
||||
|
||||
Spawns the gRPC backend server, loads a small Qwen model, runs Predict,
|
||||
TokenizeString, and Free, and verifies non-empty output.
|
||||
|
||||
Usage:
|
||||
python test_cpu_inference.py [--model MODEL_ID] [--addr HOST:PORT]
|
||||
|
||||
Defaults to Qwen/Qwen2.5-0.5B-Instruct (Qwen3.5-0.6B is not yet published
|
||||
on the HuggingFace hub at the time of writing).
|
||||
"""
|
||||
import argparse
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
|
||||
import grpc
|
||||
|
||||
# Make sibling backend_pb2 importable
|
||||
HERE = os.path.dirname(os.path.abspath(__file__))
|
||||
sys.path.insert(0, HERE)
|
||||
|
||||
import backend_pb2
|
||||
import backend_pb2_grpc
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--model", default=os.environ.get("TEST_MODEL", "Qwen/Qwen2.5-0.5B-Instruct"))
|
||||
parser.add_argument("--addr", default="127.0.0.1:50099")
|
||||
parser.add_argument("--prompt", default="Hello, how are you?")
|
||||
args = parser.parse_args()
|
||||
|
||||
# Force CPU mode for vLLM
|
||||
env = os.environ.copy()
|
||||
env.setdefault("VLLM_TARGET_DEVICE", "cpu")
|
||||
env.setdefault("VLLM_CPU_KVCACHE_SPACE", "4")
|
||||
|
||||
server_proc = subprocess.Popen(
|
||||
[sys.executable, os.path.join(HERE, "backend.py"), "--addr", args.addr],
|
||||
env=env,
|
||||
stdout=sys.stdout,
|
||||
stderr=sys.stderr,
|
||||
)
|
||||
|
||||
try:
|
||||
# Wait for the server to come up
|
||||
deadline = time.time() + 30
|
||||
channel = None
|
||||
while time.time() < deadline:
|
||||
try:
|
||||
channel = grpc.insecure_channel(args.addr)
|
||||
grpc.channel_ready_future(channel).result(timeout=2)
|
||||
break
|
||||
except Exception:
|
||||
time.sleep(0.5)
|
||||
if channel is None:
|
||||
raise RuntimeError("backend server did not start in time")
|
||||
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
|
||||
print(f"[test] LoadModel({args.model})", flush=True)
|
||||
load_resp = stub.LoadModel(backend_pb2.ModelOptions(
|
||||
Model=args.model,
|
||||
ContextSize=2048,
|
||||
), timeout=900)
|
||||
assert load_resp.success, f"LoadModel failed: {load_resp.message}"
|
||||
|
||||
print(f"[test] Predict prompt={args.prompt!r}", flush=True)
|
||||
reply = stub.Predict(backend_pb2.PredictOptions(
|
||||
Prompt=args.prompt,
|
||||
Tokens=64,
|
||||
Temperature=0.7,
|
||||
TopP=0.9,
|
||||
), timeout=600)
|
||||
text = reply.message.decode("utf-8")
|
||||
print(f"[test] Predict output: {text!r}", flush=True)
|
||||
assert text.strip(), "Predict returned empty text"
|
||||
|
||||
print("[test] TokenizeString", flush=True)
|
||||
tok_resp = stub.TokenizeString(backend_pb2.PredictOptions(Prompt="hello world"), timeout=30)
|
||||
print(f"[test] TokenizeString length={tok_resp.length}", flush=True)
|
||||
assert tok_resp.length > 0
|
||||
|
||||
print("[test] Free", flush=True)
|
||||
free_resp = stub.Free(backend_pb2.MemoryUsageData(), timeout=30)
|
||||
assert free_resp.success, f"Free failed: {free_resp.message}"
|
||||
|
||||
print("[test] PASS", flush=True)
|
||||
finally:
|
||||
server_proc.terminate()
|
||||
try:
|
||||
server_proc.wait(timeout=10)
|
||||
except subprocess.TimeoutExpired:
|
||||
server_proc.kill()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,134 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""End-to-end CPU tool-calling test for the vllm backend.
|
||||
|
||||
Loads Qwen2.5-0.5B-Instruct with the hermes tool parser, sends a chat
|
||||
completion with a `get_weather` tool, and checks that the reply's
|
||||
ChatDelta contains a ToolCallDelta for that function.
|
||||
"""
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
|
||||
import grpc
|
||||
|
||||
HERE = os.path.dirname(os.path.abspath(__file__))
|
||||
sys.path.insert(0, HERE)
|
||||
|
||||
import backend_pb2
|
||||
import backend_pb2_grpc
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--model", default="Qwen/Qwen2.5-0.5B-Instruct")
|
||||
parser.add_argument("--addr", default="127.0.0.1:50098")
|
||||
args = parser.parse_args()
|
||||
|
||||
env = os.environ.copy()
|
||||
env.setdefault("VLLM_TARGET_DEVICE", "cpu")
|
||||
env.setdefault("VLLM_CPU_KVCACHE_SPACE", "4")
|
||||
|
||||
server_proc = subprocess.Popen(
|
||||
[sys.executable, os.path.join(HERE, "backend.py"), "--addr", args.addr],
|
||||
env=env,
|
||||
stdout=sys.stdout,
|
||||
stderr=sys.stderr,
|
||||
)
|
||||
|
||||
try:
|
||||
deadline = time.time() + 30
|
||||
channel = None
|
||||
while time.time() < deadline:
|
||||
try:
|
||||
channel = grpc.insecure_channel(args.addr)
|
||||
grpc.channel_ready_future(channel).result(timeout=2)
|
||||
break
|
||||
except Exception:
|
||||
time.sleep(0.5)
|
||||
if channel is None:
|
||||
raise RuntimeError("backend server did not start in time")
|
||||
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
|
||||
print(f"[test] LoadModel({args.model}) with hermes tool_parser", flush=True)
|
||||
load_resp = stub.LoadModel(backend_pb2.ModelOptions(
|
||||
Model=args.model,
|
||||
ContextSize=2048,
|
||||
Options=["tool_parser:hermes"],
|
||||
), timeout=900)
|
||||
assert load_resp.success, f"LoadModel failed: {load_resp.message}"
|
||||
|
||||
tools = [{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_weather",
|
||||
"description": "Get the current weather for a location",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
},
|
||||
"required": ["location"],
|
||||
},
|
||||
},
|
||||
}]
|
||||
|
||||
messages = [
|
||||
backend_pb2.Message(role="system", content="You are a helpful assistant. Use the get_weather tool when the user asks about weather."),
|
||||
backend_pb2.Message(role="user", content="What's the weather like in Paris, France?"),
|
||||
]
|
||||
|
||||
print("[test] Predict with tool definitions", flush=True)
|
||||
reply = stub.Predict(backend_pb2.PredictOptions(
|
||||
Messages=messages,
|
||||
Tools=json.dumps(tools),
|
||||
ToolChoice="auto",
|
||||
UseTokenizerTemplate=True,
|
||||
Tokens=200,
|
||||
Temperature=0.1,
|
||||
), timeout=600)
|
||||
|
||||
text = reply.message.decode("utf-8")
|
||||
print(f"[test] Raw message: {text!r}", flush=True)
|
||||
print(f"[test] prompt_tokens={reply.prompt_tokens} tokens={reply.tokens}", flush=True)
|
||||
print(f"[test] chat_deltas count: {len(reply.chat_deltas)}", flush=True)
|
||||
|
||||
tool_calls_seen = []
|
||||
for delta in reply.chat_deltas:
|
||||
print(f"[test] delta.content={delta.content!r}", flush=True)
|
||||
print(f"[test] delta.reasoning_content={delta.reasoning_content!r}", flush=True)
|
||||
for tc in delta.tool_calls:
|
||||
print(f"[test] tool_call idx={tc.index} id={tc.id!r} name={tc.name!r} args={tc.arguments!r}", flush=True)
|
||||
tool_calls_seen.append(tc)
|
||||
|
||||
# Verify at least one tool call was extracted
|
||||
assert len(tool_calls_seen) > 0, (
|
||||
"No tool calls in ChatDelta. "
|
||||
f"Raw text was: {text!r}"
|
||||
)
|
||||
assert any(tc.name == "get_weather" for tc in tool_calls_seen), (
|
||||
f"Expected get_weather tool call, got: {[tc.name for tc in tool_calls_seen]}"
|
||||
)
|
||||
|
||||
print("[test] Free", flush=True)
|
||||
stub.Free(backend_pb2.HealthMessage(), timeout=30)
|
||||
|
||||
print("[test] PASS", flush=True)
|
||||
return 0
|
||||
|
||||
finally:
|
||||
try:
|
||||
server_proc.terminate()
|
||||
server_proc.wait(timeout=10)
|
||||
except Exception:
|
||||
server_proc.kill()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@@ -29,18 +29,30 @@ import (
|
||||
//
|
||||
// BACKEND_TEST_MODEL_URL HTTP(S) URL of a model file to download before the test.
|
||||
// BACKEND_TEST_MODEL_FILE Path to an already-available model file (skips download).
|
||||
// BACKEND_TEST_MODEL_NAME HuggingFace model id (e.g. "Qwen/Qwen2.5-0.5B-Instruct").
|
||||
// Passed verbatim as ModelOptions.Model; backends like vllm
|
||||
// resolve it themselves and no local file is downloaded.
|
||||
//
|
||||
// Optional:
|
||||
//
|
||||
// BACKEND_TEST_CAPS Comma-separated list of capabilities to exercise.
|
||||
// Supported values: health, load, predict, stream, embeddings.
|
||||
// Supported values: health, load, predict, stream,
|
||||
// embeddings, tools.
|
||||
// Defaults to "health,load,predict,stream".
|
||||
// A backend that only does embeddings would set this to
|
||||
// "health,load,embeddings"; an image/TTS backend that cannot
|
||||
// be driven by a text prompt can set it to "health,load".
|
||||
// "tools" asks the backend to extract a tool call from the
|
||||
// model output into ChatDelta.tool_calls.
|
||||
// BACKEND_TEST_PROMPT Override the prompt used by predict/stream specs.
|
||||
// BACKEND_TEST_CTX_SIZE Override the context size passed to LoadModel (default 512).
|
||||
// BACKEND_TEST_THREADS Override Threads passed to LoadModel (default 4).
|
||||
// BACKEND_TEST_OPTIONS Comma-separated Options[] entries passed to LoadModel,
|
||||
// e.g. "tool_parser:hermes,reasoning_parser:qwen3".
|
||||
// BACKEND_TEST_TOOL_PROMPT Override the user prompt for the tools spec
|
||||
// (default: "What's the weather like in Paris, France?").
|
||||
// BACKEND_TEST_TOOL_NAME Override the function name expected in the tool call
|
||||
// (default: "get_weather").
|
||||
//
|
||||
// The suite is intentionally model-format-agnostic: it only ever passes the
|
||||
// file path to LoadModel, so GGUF, ONNX, safetensors, .bin etc. all work so
|
||||
@@ -51,9 +63,12 @@ const (
|
||||
capPredict = "predict"
|
||||
capStream = "stream"
|
||||
capEmbeddings = "embeddings"
|
||||
capTools = "tools"
|
||||
|
||||
defaultPrompt = "The capital of France is"
|
||||
streamPrompt = "Once upon a time"
|
||||
defaultPrompt = "The capital of France is"
|
||||
streamPrompt = "Once upon a time"
|
||||
defaultToolPrompt = "What's the weather like in Paris, France?"
|
||||
defaultToolName = "get_weather"
|
||||
)
|
||||
|
||||
func defaultCaps() map[string]bool {
|
||||
@@ -87,12 +102,14 @@ var _ = Describe("Backend container", Ordered, func() {
|
||||
caps map[string]bool
|
||||
workDir string
|
||||
binaryDir string
|
||||
modelFile string
|
||||
modelFile string // set when a local file is used
|
||||
modelName string // set when a HuggingFace model id is used
|
||||
addr string
|
||||
serverCmd *exec.Cmd
|
||||
conn *grpc.ClientConn
|
||||
client pb.BackendClient
|
||||
prompt string
|
||||
options []string
|
||||
)
|
||||
|
||||
BeforeAll(func() {
|
||||
@@ -101,8 +118,9 @@ var _ = Describe("Backend container", Ordered, func() {
|
||||
|
||||
modelURL := os.Getenv("BACKEND_TEST_MODEL_URL")
|
||||
modelFile = os.Getenv("BACKEND_TEST_MODEL_FILE")
|
||||
Expect(modelURL != "" || modelFile != "").To(BeTrue(),
|
||||
"one of BACKEND_TEST_MODEL_URL or BACKEND_TEST_MODEL_FILE must be set")
|
||||
modelName = os.Getenv("BACKEND_TEST_MODEL_NAME")
|
||||
Expect(modelURL != "" || modelFile != "" || modelName != "").To(BeTrue(),
|
||||
"one of BACKEND_TEST_MODEL_URL, BACKEND_TEST_MODEL_FILE, or BACKEND_TEST_MODEL_NAME must be set")
|
||||
|
||||
caps = parseCaps()
|
||||
GinkgoWriter.Printf("Testing image=%q with capabilities=%v\n", image, keys(caps))
|
||||
@@ -112,6 +130,15 @@ var _ = Describe("Backend container", Ordered, func() {
|
||||
prompt = defaultPrompt
|
||||
}
|
||||
|
||||
if raw := strings.TrimSpace(os.Getenv("BACKEND_TEST_OPTIONS")); raw != "" {
|
||||
for _, opt := range strings.Split(raw, ",") {
|
||||
opt = strings.TrimSpace(opt)
|
||||
if opt != "" {
|
||||
options = append(options, opt)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var err error
|
||||
workDir, err = os.MkdirTemp("", "backend-e2e-*")
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
@@ -122,8 +149,8 @@ var _ = Describe("Backend container", Ordered, func() {
|
||||
extractImage(image, binaryDir)
|
||||
Expect(filepath.Join(binaryDir, "run.sh")).To(BeAnExistingFile())
|
||||
|
||||
// Download the model once if not provided.
|
||||
if modelFile == "" {
|
||||
// Download the model once if not provided and no HF name given.
|
||||
if modelFile == "" && modelName == "" {
|
||||
modelFile = filepath.Join(workDir, "model.bin")
|
||||
downloadFile(modelURL, modelFile)
|
||||
}
|
||||
@@ -196,16 +223,27 @@ var _ = Describe("Backend container", Ordered, func() {
|
||||
ctxSize := envInt32("BACKEND_TEST_CTX_SIZE", 512)
|
||||
threads := envInt32("BACKEND_TEST_THREADS", 4)
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
|
||||
// Prefer a HuggingFace model id when provided (e.g. for vllm);
|
||||
// otherwise fall back to a downloaded/local file path.
|
||||
modelRef := modelFile
|
||||
var modelPath string
|
||||
if modelName != "" {
|
||||
modelRef = modelName
|
||||
} else {
|
||||
modelPath = modelFile
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute)
|
||||
defer cancel()
|
||||
res, err := client.LoadModel(ctx, &pb.ModelOptions{
|
||||
Model: modelFile,
|
||||
ModelFile: modelFile,
|
||||
Model: modelRef,
|
||||
ModelFile: modelPath,
|
||||
ContextSize: ctxSize,
|
||||
Threads: threads,
|
||||
NGPULayers: 0,
|
||||
MMap: true,
|
||||
NBatch: 128,
|
||||
Options: options,
|
||||
})
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
Expect(res.GetSuccess()).To(BeTrue(), "LoadModel failed: %s", res.GetMessage())
|
||||
@@ -275,6 +313,78 @@ var _ = Describe("Backend container", Ordered, func() {
|
||||
Expect(res.GetEmbeddings()).NotTo(BeEmpty(), "Embedding returned empty vector")
|
||||
GinkgoWriter.Printf("Embedding: %d dims\n", len(res.GetEmbeddings()))
|
||||
})
|
||||
|
||||
It("extracts tool calls into ChatDelta", func() {
|
||||
if !caps[capTools] {
|
||||
Skip("tools capability not enabled")
|
||||
}
|
||||
|
||||
toolPrompt := os.Getenv("BACKEND_TEST_TOOL_PROMPT")
|
||||
if toolPrompt == "" {
|
||||
toolPrompt = defaultToolPrompt
|
||||
}
|
||||
toolName := os.Getenv("BACKEND_TEST_TOOL_NAME")
|
||||
if toolName == "" {
|
||||
toolName = defaultToolName
|
||||
}
|
||||
|
||||
toolsJSON := fmt.Sprintf(`[{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": %q,
|
||||
"description": "Get the current weather for a location",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA"
|
||||
}
|
||||
},
|
||||
"required": ["location"]
|
||||
}
|
||||
}
|
||||
}]`, toolName)
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
|
||||
defer cancel()
|
||||
res, err := client.Predict(ctx, &pb.PredictOptions{
|
||||
Messages: []*pb.Message{
|
||||
{Role: "system", Content: "You are a helpful assistant. Use the provided tool when the user asks about weather."},
|
||||
{Role: "user", Content: toolPrompt},
|
||||
},
|
||||
Tools: toolsJSON,
|
||||
ToolChoice: "auto",
|
||||
UseTokenizerTemplate: true,
|
||||
Tokens: 200,
|
||||
Temperature: 0.1,
|
||||
})
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
// Collect tool calls from every delta — some backends emit a single
|
||||
// final delta, others stream incremental pieces in one Reply.
|
||||
var toolCalls []*pb.ToolCallDelta
|
||||
for _, delta := range res.GetChatDeltas() {
|
||||
toolCalls = append(toolCalls, delta.GetToolCalls()...)
|
||||
}
|
||||
|
||||
GinkgoWriter.Printf("Tool call: raw=%q deltas=%d tool_calls=%d\n",
|
||||
string(res.GetMessage()), len(res.GetChatDeltas()), len(toolCalls))
|
||||
|
||||
Expect(toolCalls).NotTo(BeEmpty(),
|
||||
"Predict did not return any ToolCallDelta. raw=%q", string(res.GetMessage()))
|
||||
|
||||
matched := false
|
||||
for _, tc := range toolCalls {
|
||||
GinkgoWriter.Printf(" - idx=%d id=%q name=%q args=%q\n",
|
||||
tc.GetIndex(), tc.GetId(), tc.GetName(), tc.GetArguments())
|
||||
if tc.GetName() == toolName {
|
||||
matched = true
|
||||
}
|
||||
}
|
||||
Expect(matched).To(BeTrue(),
|
||||
"Expected a tool call named %q in ChatDelta.tool_calls", toolName)
|
||||
})
|
||||
})
|
||||
|
||||
// extractImage runs `docker create` + `docker export` to materialise the image
|
||||
|
||||
Reference in New Issue
Block a user