diff --git a/.agents/adding-backends.md b/.agents/adding-backends.md new file mode 100644 index 000000000..46233469b --- /dev/null +++ b/.agents/adding-backends.md @@ -0,0 +1,143 @@ +# Adding a New Backend + +When adding a new backend to LocalAI, you need to update several files to ensure the backend is properly built, tested, and registered. Here's a step-by-step guide based on the pattern used for adding backends like `moonshine`: + +## 1. Create Backend Directory Structure + +Create the backend directory under the appropriate location: +- **Python backends**: `backend/python//` +- **Go backends**: `backend/go//` +- **C++ backends**: `backend/cpp//` + +For Python backends, you'll typically need: +- `backend.py` - Main gRPC server implementation +- `Makefile` - Build configuration +- `install.sh` - Installation script for dependencies +- `protogen.sh` - Protocol buffer generation script +- `requirements.txt` - Python dependencies +- `run.sh` - Runtime script +- `test.py` / `test.sh` - Test files + +## 2. Add Build Configurations to `.github/workflows/backend.yml` + +Add build matrix entries for each platform/GPU type you want to support. Look at similar backends (e.g., `chatterbox`, `faster-whisper`) for reference. + +**Placement in file:** +- CPU builds: Add after other CPU builds (e.g., after `cpu-chatterbox`) +- CUDA 12 builds: Add after other CUDA 12 builds (e.g., after `gpu-nvidia-cuda-12-chatterbox`) +- CUDA 13 builds: Add after other CUDA 13 builds (e.g., after `gpu-nvidia-cuda-13-chatterbox`) + +**Additional build types you may need:** +- ROCm/HIP: Use `build-type: 'hipblas'` with `base-image: "rocm/dev-ubuntu-24.04:6.4.4"` +- Intel/SYCL: Use `build-type: 'intel'` or `build-type: 'sycl_f16'`/`sycl_f32` with `base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"` +- L4T (ARM): Use `build-type: 'l4t'` with `platforms: 'linux/arm64'` and `runs-on: 'ubuntu-24.04-arm'` + +## 3. Add Backend Metadata to `backend/index.yaml` + +**Step 3a: Add Meta Definition** + +Add a YAML anchor definition in the `## metas` section (around line 2-300). Look for similar backends to use as a template such as `diffusers` or `chatterbox` + +**Step 3b: Add Image Entries** + +Add image entries at the end of the file, following the pattern of similar backends such as `diffusers` or `chatterbox`. Include both `latest` (production) and `master` (development) tags. + +## 4. Update the Makefile + +The Makefile needs to be updated in several places to support building and testing the new backend: + +**Step 4a: Add to `.NOTPARALLEL`** + +Add `backends/` to the `.NOTPARALLEL` line (around line 2) to prevent parallel execution conflicts: + +```makefile +.NOTPARALLEL: ... backends/ +``` + +**Step 4b: Add to `prepare-test-extra`** + +Add the backend to the `prepare-test-extra` target (around line 312) to prepare it for testing: + +```makefile +prepare-test-extra: protogen-python + ... + $(MAKE) -C backend/python/ +``` + +**Step 4c: Add to `test-extra`** + +Add the backend to the `test-extra` target (around line 319) to run its tests: + +```makefile +test-extra: prepare-test-extra + ... + $(MAKE) -C backend/python/ test +``` + +**Step 4d: Add Backend Definition** + +Add a backend definition variable in the backend definitions section (around line 428-457). The format depends on the backend type: + +**For Python backends with root context** (like `faster-whisper`, `coqui`): +```makefile +BACKEND_ = |python|.|false|true +``` + +**For Python backends with `./backend` context** (like `chatterbox`, `moonshine`): +```makefile +BACKEND_ = |python|./backend|false|true +``` + +**For Go backends**: +```makefile +BACKEND_ = |golang|.|false|true +``` + +**Step 4e: Generate Docker Build Target** + +Add an eval call to generate the docker-build target (around line 480-501): + +```makefile +$(eval $(call generate-docker-build-target,$(BACKEND_))) +``` + +**Step 4f: Add to `docker-build-backends`** + +Add `docker-build-` to the `docker-build-backends` target (around line 507): + +```makefile +docker-build-backends: ... docker-build- +``` + +**Determining the Context:** + +- If the backend is in `backend/python//` and uses `./backend` as context in the workflow file, use `./backend` context +- If the backend is in `backend/python//` but uses `.` as context in the workflow file, use `.` context +- Check similar backends to determine the correct context + +## 5. Verification Checklist + +After adding a new backend, verify: + +- [ ] Backend directory structure is complete with all necessary files +- [ ] Build configurations added to `.github/workflows/backend.yml` for all desired platforms +- [ ] Meta definition added to `backend/index.yaml` in the `## metas` section +- [ ] Image entries added to `backend/index.yaml` for all build variants (latest + development) +- [ ] Tag suffixes match between workflow file and index.yaml +- [ ] Makefile updated with all 6 required changes (`.NOTPARALLEL`, `prepare-test-extra`, `test-extra`, backend definition, docker-build target eval, `docker-build-backends`) +- [ ] No YAML syntax errors (check with linter) +- [ ] No Makefile syntax errors (check with linter) +- [ ] Follows the same pattern as similar backends (e.g., if it's a transcription backend, follow `faster-whisper` pattern) + +## 6. Example: Adding a Python Backend + +For reference, when `moonshine` was added: +- **Files created**: `backend/python/moonshine/{backend.py, Makefile, install.sh, protogen.sh, requirements.txt, run.sh, test.py, test.sh}` +- **Workflow entries**: 3 build configurations (CPU, CUDA 12, CUDA 13) +- **Index entries**: 1 meta definition + 6 image entries (cpu, cuda12, cuda13 x latest/development) +- **Makefile updates**: + - Added to `.NOTPARALLEL` line + - Added to `prepare-test-extra` and `test-extra` targets + - Added `BACKEND_MOONSHINE = moonshine|python|./backend|false|true` + - Added eval for docker-build target generation + - Added `docker-build-moonshine` to `docker-build-backends` diff --git a/.agents/building-and-testing.md b/.agents/building-and-testing.md new file mode 100644 index 000000000..2f5488fa4 --- /dev/null +++ b/.agents/building-and-testing.md @@ -0,0 +1,16 @@ +# Build and Testing + +Building and testing the project depends on the components involved and the platform where development is taking place. Due to the amount of context required it's usually best not to try building or testing the project unless the user requests it. If you must build the project then inspect the Makefile in the project root and the Makefiles of any backends that are effected by changes you are making. In addition the workflows in .github/workflows can be used as a reference when it is unclear how to build or test a component. The primary Makefile contains targets for building inside or outside Docker, if the user has not previously specified a preference then ask which they would like to use. + +## Building a specified backend + +Let's say the user wants to build a particular backend for a given platform. For example let's say they want to build coqui for ROCM/hipblas + +- The Makefile has targets like `docker-build-coqui` created with `generate-docker-build-target` at the time of writing. Recently added backends may require a new target. +- At a minimum we need to set the BUILD_TYPE, BASE_IMAGE build-args + - Use .github/workflows/backend.yml as a reference it lists the needed args in the `include` job strategy matrix + - l4t and cublas also requires the CUDA major and minor version +- You can pretty print a command like `DOCKER_MAKEFLAGS=-j$(nproc --ignore=1) BUILD_TYPE=hipblas BASE_IMAGE=rocm/dev-ubuntu-24.04:6.4.4 make docker-build-coqui` +- Unless the user specifies that they want you to run the command, then just print it because not all agent frontends handle long running jobs well and the output may overflow your context +- The user may say they want to build AMD or ROCM instead of hipblas, or Intel instead of SYCL or NVIDIA insted of l4t or cublas. Ask for confirmation if there is ambiguity. +- Sometimes the user may need extra parameters to be added to `docker build` (e.g. `--platform` for cross-platform builds or `--progress` to view the full logs), in which case you can generate the `docker build` command directly. diff --git a/.agents/coding-style.md b/.agents/coding-style.md new file mode 100644 index 000000000..7cc23b569 --- /dev/null +++ b/.agents/coding-style.md @@ -0,0 +1,51 @@ +# Coding Style + +The project has the following .editorconfig: + +``` +root = true + +[*] +indent_style = space +indent_size = 2 +end_of_line = lf +charset = utf-8 +trim_trailing_whitespace = true +insert_final_newline = true + +[*.go] +indent_style = tab + +[Makefile] +indent_style = tab + +[*.proto] +indent_size = 2 + +[*.py] +indent_size = 4 + +[*.js] +indent_size = 2 + +[*.yaml] +indent_size = 2 + +[*.md] +trim_trailing_whitespace = false +``` + +- Use comments sparingly to explain why code does something, not what it does. Comments are there to add context that would be difficult to deduce from reading the code. +- Prefer modern Go e.g. use `any` not `interface{}` + +## Logging + +Use `github.com/mudler/xlog` for logging which has the same API as slog. + +## Documentation + +The project documentation is located in `docs/content`. When adding new features or changing existing functionality, it is crucial to update the documentation to reflect these changes. This helps users understand how to use the new capabilities and ensures the documentation stays relevant. + +- **Feature Documentation**: If you add a new feature (like a new backend or API endpoint), create a new markdown file in `docs/content/features/` explaining what it is, how to configure it, and how to use it. +- **Configuration**: If you modify configuration options, update the relevant sections in `docs/content/`. +- **Examples**: providing concrete examples (like YAML configuration blocks) is highly encouraged to help users get started quickly. diff --git a/.agents/llama-cpp-backend.md b/.agents/llama-cpp-backend.md new file mode 100644 index 000000000..1fc5765c9 --- /dev/null +++ b/.agents/llama-cpp-backend.md @@ -0,0 +1,77 @@ +# llama.cpp Backend + +The llama.cpp backend (`backend/cpp/llama-cpp/grpc-server.cpp`) is a gRPC adaptation of the upstream HTTP server (`llama.cpp/tools/server/server.cpp`). It uses the same underlying server infrastructure from `llama.cpp/tools/server/server-context.cpp`. + +## Building and Testing + +- Test llama.cpp backend compilation: `make backends/llama-cpp` +- The backend is built as part of the main build process +- Check `backend/cpp/llama-cpp/Makefile` for build configuration + +## Architecture + +- **grpc-server.cpp**: gRPC server implementation, adapts HTTP server patterns to gRPC +- Uses shared server infrastructure: `server-context.cpp`, `server-task.cpp`, `server-queue.cpp`, `server-common.cpp` +- The gRPC server mirrors the HTTP server's functionality but uses gRPC instead of HTTP + +## Common Issues When Updating llama.cpp + +When fixing compilation errors after upstream changes: +1. Check how `server.cpp` (HTTP server) handles the same change +2. Look for new public APIs or getter methods +3. Store copies of needed data instead of accessing private members +4. Update function calls to match new signatures +5. Test with `make backends/llama-cpp` + +## Key Differences from HTTP Server + +- gRPC uses `BackendServiceImpl` class with gRPC service methods +- HTTP server uses `server_routes` with HTTP handlers +- Both use the same `server_context` and task queue infrastructure +- gRPC methods: `LoadModel`, `Predict`, `PredictStream`, `Embedding`, `Rerank`, `TokenizeString`, `GetMetrics`, `Health` + +## Tool Call Parsing Maintenance + +When working on JSON/XML tool call parsing functionality, always check llama.cpp for reference implementation and updates: + +### Checking for XML Parsing Changes + +1. **Review XML Format Definitions**: Check `llama.cpp/common/chat-parser-xml-toolcall.h` for `xml_tool_call_format` struct changes +2. **Review Parsing Logic**: Check `llama.cpp/common/chat-parser-xml-toolcall.cpp` for parsing algorithm updates +3. **Review Format Presets**: Check `llama.cpp/common/chat-parser.cpp` for new XML format presets (search for `xml_tool_call_format form`) +4. **Review Model Lists**: Check `llama.cpp/common/chat.h` for `COMMON_CHAT_FORMAT_*` enum values that use XML parsing: + - `COMMON_CHAT_FORMAT_GLM_4_5` + - `COMMON_CHAT_FORMAT_MINIMAX_M2` + - `COMMON_CHAT_FORMAT_KIMI_K2` + - `COMMON_CHAT_FORMAT_QWEN3_CODER_XML` + - `COMMON_CHAT_FORMAT_APRIEL_1_5` + - `COMMON_CHAT_FORMAT_XIAOMI_MIMO` + - Any new formats added + +### Model Configuration Options + +Always check `llama.cpp` for new model configuration options that should be supported in LocalAI: + +1. **Check Server Context**: Review `llama.cpp/tools/server/server-context.cpp` for new parameters +2. **Check Chat Params**: Review `llama.cpp/common/chat.h` for `common_chat_params` struct changes +3. **Check Server Options**: Review `llama.cpp/tools/server/server.cpp` for command-line argument changes +4. **Examples of options to check**: + - `ctx_shift` - Context shifting support + - `parallel_tool_calls` - Parallel tool calling + - `reasoning_format` - Reasoning format options + - Any new flags or parameters + +### Implementation Guidelines + +1. **Feature Parity**: Always aim for feature parity with llama.cpp's implementation +2. **Test Coverage**: Add tests for new features matching llama.cpp's behavior +3. **Documentation**: Update relevant documentation when adding new formats or options +4. **Backward Compatibility**: Ensure changes don't break existing functionality + +### Files to Monitor + +- `llama.cpp/common/chat-parser-xml-toolcall.h` - Format definitions +- `llama.cpp/common/chat-parser-xml-toolcall.cpp` - Parsing logic +- `llama.cpp/common/chat-parser.cpp` - Format presets and model-specific handlers +- `llama.cpp/common/chat.h` - Format enums and parameter structures +- `llama.cpp/tools/server/server-context.cpp` - Server configuration options diff --git a/.agents/testing-mcp-apps.md b/.agents/testing-mcp-apps.md new file mode 100644 index 000000000..c744f2c99 --- /dev/null +++ b/.agents/testing-mcp-apps.md @@ -0,0 +1,120 @@ +# Testing MCP Apps (Interactive Tool UIs) + +MCP Apps is an extension to MCP where tools declare interactive HTML UIs via `_meta.ui.resourceUri`. When the LLM calls such a tool, the UI renders the app in a sandboxed iframe inline in the chat. The app communicates bidirectionally with the host via `postMessage` (JSON-RPC) and can call server tools, send messages, and update model context. + +Spec: https://modelcontextprotocol.io/extensions/apps/overview + +## Quick Start: Run a Test MCP App Server + +The `@modelcontextprotocol/server-basic-react` npm package is a ready-to-use test server that exposes a `get-time` tool with an interactive React clock UI. It requires Node >= 20, so run it in Docker: + +```bash +docker run -d --name mcp-app-test -p 3001:3001 node:22-slim \ + sh -c 'npx -y @modelcontextprotocol/server-basic-react' +``` + +Wait ~10 seconds for it to start, then verify: + +```bash +# Check it's running +docker logs mcp-app-test +# Expected: "MCP server listening on http://localhost:3001/mcp" + +# Verify MCP protocol works +curl -s -X POST http://localhost:3001/mcp \ + -H 'Content-Type: application/json' \ + -H 'Accept: application/json, text/event-stream' \ + -d '{"jsonrpc":"2.0","id":1,"method":"initialize","params":{"protocolVersion":"2025-03-26","capabilities":{},"clientInfo":{"name":"test","version":"1.0.0"}}}' + +# List tools — should show get-time with _meta.ui.resourceUri +curl -s -X POST http://localhost:3001/mcp \ + -H 'Content-Type: application/json' \ + -H 'Accept: application/json, text/event-stream' \ + -d '{"jsonrpc":"2.0","id":2,"method":"tools/list","params":{}}' +``` + +The `tools/list` response should contain: +```json +{ + "name": "get-time", + "_meta": { + "ui": { "resourceUri": "ui://get-time/mcp-app.html" } + } +} +``` + +## Testing in LocalAI's UI + +1. Make sure LocalAI is running (e.g. `http://localhost:8080`) +2. Build the React UI: `cd core/http/react-ui && npm install && npm run build` +3. Open the Chat page in your browser +4. Click **"Client MCP"** in the chat header +5. Add a new client MCP server: + - **URL**: `http://localhost:3001/mcp` + - **Use CORS proxy**: enabled (default) — required because the browser can't hit `localhost:3001` directly due to CORS; LocalAI's proxy at `/api/cors-proxy` handles it +6. The server should connect and discover the `get-time` tool +7. Select a model and send: **"What time is it?"** +8. The LLM should call the `get-time` tool +9. The tool result should render the interactive React clock app in an iframe as a standalone chat message (not inside the collapsed activity group) + +## What to Verify + +- [ ] Tool appears in the connected tools list (not filtered — `get-time` is callable by the LLM) +- [ ] The iframe renders as a standalone chat message with a puzzle-piece icon +- [ ] The app loads and is interactive (clock UI, buttons work) +- [ ] No "Reconnect to MCP server" overlay (connection is live) +- [ ] Console logs show bidirectional communication: + - `tools/call` messages from app to host (app calling server tools) + - `ui/message` notifications (app sending messages) +- [ ] After the app renders, the LLM continues and produces a text response with the time +- [ ] Non-UI tools continue to work normally (text-only results) +- [ ] Page reload shows the HTML statically with a reconnect overlay until you reconnect + +## Console Log Patterns + +Healthy bidirectional communication looks like: + +``` +Parsed message { jsonrpc: "2.0", id: N, result: {...} } // Bridge init +get-time result: { content: [...] } // Tool result received +Calling get-time tool... // App calls tool +Sending message { method: "tools/call", ... } // App -> host -> server +Parsed message { jsonrpc: "2.0", id: N, result: {...} } // Server response +Sending message text to Host: ... // App sends message +Sending message { method: "ui/message", ... } // Message notification +Message accepted // Host acknowledged +``` + +Benign warnings to ignore: +- `Source map error: ... about:srcdoc` — browser devtools can't find source maps for srcdoc iframes +- `Ignoring message from unknown source` — duplicate postMessage from iframe navigation +- `notifications/cancelled` — app cleaning up previous requests + +## Architecture Notes + +- **No server-side changes needed** — the MCP App protocol runs entirely in the browser +- `PostMessageTransport` wraps `window.postMessage` between host and `srcdoc` iframe +- `AppBridge` (from `@modelcontextprotocol/ext-apps`) auto-forwards `tools/call`, `resources/read`, `resources/list` from the app to the MCP server via the host's `Client` +- The iframe uses `sandbox="allow-scripts allow-forms"` (no `allow-same-origin`) — opaque origin, no access to host cookies/DOM/localStorage +- App-only tools (`_meta.ui.visibility: "app-only"`) are filtered from the LLM's tool list but remain callable by the app iframe + +## Key Files + +- `core/http/react-ui/src/components/MCPAppFrame.jsx` — iframe + AppBridge component +- `core/http/react-ui/src/hooks/useMCPClient.js` — MCP client hook with app UI helpers (`hasAppUI`, `getAppResource`, `getClientForTool`, `getToolDefinition`) +- `core/http/react-ui/src/hooks/useChat.js` — agentic loop, attaches `appUI` to tool_result messages +- `core/http/react-ui/src/pages/Chat.jsx` — renders MCPAppFrame as standalone chat messages + +## Other Test Servers + +The `@modelcontextprotocol/ext-apps` repo has many example servers: +- `@modelcontextprotocol/server-basic-react` — simple clock (React) +- More examples at https://github.com/modelcontextprotocol/ext-apps/tree/main/examples + +All examples support both stdio and HTTP transport. Run without `--stdio` for HTTP mode on port 3001. + +## Cleanup + +```bash +docker rm -f mcp-app-test +``` diff --git a/AGENTS.md b/AGENTS.md index d8b902ebe..f2cfb71ad 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,290 +1,22 @@ -# Build and testing +# LocalAI Agent Instructions -Building and testing the project depends on the components involved and the platform where development is taking place. Due to the amount of context required it's usually best not to try building or testing the project unless the user requests it. If you must build the project then inspect the Makefile in the project root and the Makefiles of any backends that are effected by changes you are making. In addition the workflows in .github/workflows can be used as a reference when it is unclear how to build or test a component. The primary Makefile contains targets for building inside or outside Docker, if the user has not previously specified a preference then ask which they would like to use. +This file is an index to detailed topic guides in the `.agents/` directory. Read the relevant file(s) for the task at hand — you don't need to load all of them. -## Building a specified backend +## Topics -Let's say the user wants to build a particular backend for a given platform. For example let's say they want to build coqui for ROCM/hipblas +| File | When to read | +|------|-------------| +| [.agents/building-and-testing.md](.agents/building-and-testing.md) | Building the project, running tests, Docker builds for specific platforms | +| [.agents/adding-backends.md](.agents/adding-backends.md) | Adding a new backend (Python, Go, or C++) — full step-by-step checklist | +| [.agents/coding-style.md](.agents/coding-style.md) | Code style, editorconfig, logging, documentation conventions | +| [.agents/llama-cpp-backend.md](.agents/llama-cpp-backend.md) | Working on the llama.cpp backend — architecture, updating, tool call parsing | +| [.agents/testing-mcp-apps.md](.agents/testing-mcp-apps.md) | Testing MCP Apps (interactive tool UIs) in the React UI | -- The Makefile has targets like `docker-build-coqui` created with `generate-docker-build-target` at the time of writing. Recently added backends may require a new target. -- At a minimum we need to set the BUILD_TYPE, BASE_IMAGE build-args - - Use .github/workflows/backend.yml as a reference it lists the needed args in the `include` job strategy matrix - - l4t and cublas also requires the CUDA major and minor version -- You can pretty print a command like `DOCKER_MAKEFLAGS=-j$(nproc --ignore=1) BUILD_TYPE=hipblas BASE_IMAGE=rocm/dev-ubuntu-24.04:6.4.4 make docker-build-coqui` -- Unless the user specifies that they want you to run the command, then just print it because not all agent frontends handle long running jobs well and the output may overflow your context -- The user may say they want to build AMD or ROCM instead of hipblas, or Intel instead of SYCL or NVIDIA insted of l4t or cublas. Ask for confirmation if there is ambiguity. -- Sometimes the user may need extra parameters to be added to `docker build` (e.g. `--platform` for cross-platform builds or `--progress` to view the full logs), in which case you can generate the `docker build` command directly. +## Quick Reference -## Adding a New Backend - -When adding a new backend to LocalAI, you need to update several files to ensure the backend is properly built, tested, and registered. Here's a step-by-step guide based on the pattern used for adding backends like `moonshine`: - -### 1. Create Backend Directory Structure - -Create the backend directory under the appropriate location: -- **Python backends**: `backend/python//` -- **Go backends**: `backend/go//` -- **C++ backends**: `backend/cpp//` - -For Python backends, you'll typically need: -- `backend.py` - Main gRPC server implementation -- `Makefile` - Build configuration -- `install.sh` - Installation script for dependencies -- `protogen.sh` - Protocol buffer generation script -- `requirements.txt` - Python dependencies -- `run.sh` - Runtime script -- `test.py` / `test.sh` - Test files - -### 2. Add Build Configurations to `.github/workflows/backend.yml` - -Add build matrix entries for each platform/GPU type you want to support. Look at similar backends (e.g., `chatterbox`, `faster-whisper`) for reference. - -**Placement in file:** -- CPU builds: Add after other CPU builds (e.g., after `cpu-chatterbox`) -- CUDA 12 builds: Add after other CUDA 12 builds (e.g., after `gpu-nvidia-cuda-12-chatterbox`) -- CUDA 13 builds: Add after other CUDA 13 builds (e.g., after `gpu-nvidia-cuda-13-chatterbox`) - -**Additional build types you may need:** -- ROCm/HIP: Use `build-type: 'hipblas'` with `base-image: "rocm/dev-ubuntu-24.04:6.4.4"` -- Intel/SYCL: Use `build-type: 'intel'` or `build-type: 'sycl_f16'`/`sycl_f32` with `base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"` -- L4T (ARM): Use `build-type: 'l4t'` with `platforms: 'linux/arm64'` and `runs-on: 'ubuntu-24.04-arm'` - -### 3. Add Backend Metadata to `backend/index.yaml` - -**Step 3a: Add Meta Definition** - -Add a YAML anchor definition in the `## metas` section (around line 2-300). Look for similar backends to use as a template such as `diffusers` or `chatterbox` - -**Step 3b: Add Image Entries** - -Add image entries at the end of the file, following the pattern of similar backends such as `diffusers` or `chatterbox`. Include both `latest` (production) and `master` (development) tags. - -### 4. Update the Makefile - -The Makefile needs to be updated in several places to support building and testing the new backend: - -**Step 4a: Add to `.NOTPARALLEL`** - -Add `backends/` to the `.NOTPARALLEL` line (around line 2) to prevent parallel execution conflicts: - -```makefile -.NOTPARALLEL: ... backends/ -``` - -**Step 4b: Add to `prepare-test-extra`** - -Add the backend to the `prepare-test-extra` target (around line 312) to prepare it for testing: - -```makefile -prepare-test-extra: protogen-python - ... - $(MAKE) -C backend/python/ -``` - -**Step 4c: Add to `test-extra`** - -Add the backend to the `test-extra` target (around line 319) to run its tests: - -```makefile -test-extra: prepare-test-extra - ... - $(MAKE) -C backend/python/ test -``` - -**Step 4d: Add Backend Definition** - -Add a backend definition variable in the backend definitions section (around line 428-457). The format depends on the backend type: - -**For Python backends with root context** (like `faster-whisper`, `coqui`): -```makefile -BACKEND_ = |python|.|false|true -``` - -**For Python backends with `./backend` context** (like `chatterbox`, `moonshine`): -```makefile -BACKEND_ = |python|./backend|false|true -``` - -**For Go backends**: -```makefile -BACKEND_ = |golang|.|false|true -``` - -**Step 4e: Generate Docker Build Target** - -Add an eval call to generate the docker-build target (around line 480-501): - -```makefile -$(eval $(call generate-docker-build-target,$(BACKEND_))) -``` - -**Step 4f: Add to `docker-build-backends`** - -Add `docker-build-` to the `docker-build-backends` target (around line 507): - -```makefile -docker-build-backends: ... docker-build- -``` - -**Determining the Context:** - -- If the backend is in `backend/python//` and uses `./backend` as context in the workflow file, use `./backend` context -- If the backend is in `backend/python//` but uses `.` as context in the workflow file, use `.` context -- Check similar backends to determine the correct context - -### 5. Verification Checklist - -After adding a new backend, verify: - -- [ ] Backend directory structure is complete with all necessary files -- [ ] Build configurations added to `.github/workflows/backend.yml` for all desired platforms -- [ ] Meta definition added to `backend/index.yaml` in the `## metas` section -- [ ] Image entries added to `backend/index.yaml` for all build variants (latest + development) -- [ ] Tag suffixes match between workflow file and index.yaml -- [ ] Makefile updated with all 6 required changes (`.NOTPARALLEL`, `prepare-test-extra`, `test-extra`, backend definition, docker-build target eval, `docker-build-backends`) -- [ ] No YAML syntax errors (check with linter) -- [ ] No Makefile syntax errors (check with linter) -- [ ] Follows the same pattern as similar backends (e.g., if it's a transcription backend, follow `faster-whisper` pattern) - -### 6. Example: Adding a Python Backend - -For reference, when `moonshine` was added: -- **Files created**: `backend/python/moonshine/{backend.py, Makefile, install.sh, protogen.sh, requirements.txt, run.sh, test.py, test.sh}` -- **Workflow entries**: 3 build configurations (CPU, CUDA 12, CUDA 13) -- **Index entries**: 1 meta definition + 6 image entries (cpu, cuda12, cuda13 × latest/development) -- **Makefile updates**: - - Added to `.NOTPARALLEL` line - - Added to `prepare-test-extra` and `test-extra` targets - - Added `BACKEND_MOONSHINE = moonshine|python|./backend|false|true` - - Added eval for docker-build target generation - - Added `docker-build-moonshine` to `docker-build-backends` - -# Coding style - -- The project has the following .editorconfig - -``` -root = true - -[*] -indent_style = space -indent_size = 2 -end_of_line = lf -charset = utf-8 -trim_trailing_whitespace = true -insert_final_newline = true - -[*.go] -indent_style = tab - -[Makefile] -indent_style = tab - -[*.proto] -indent_size = 2 - -[*.py] -indent_size = 4 - -[*.js] -indent_size = 2 - -[*.yaml] -indent_size = 2 - -[*.md] -trim_trailing_whitespace = false -``` - -- Use comments sparingly to explain why code does something, not what it does. Comments are there to add context that would be difficult to deduce from reading the code. -- Prefer modern Go e.g. use `any` not `interface{}` - -# Logging - -Use `github.com/mudler/xlog` for logging which has the same API as slog. - -# llama.cpp Backend - -The llama.cpp backend (`backend/cpp/llama-cpp/grpc-server.cpp`) is a gRPC adaptation of the upstream HTTP server (`llama.cpp/tools/server/server.cpp`). It uses the same underlying server infrastructure from `llama.cpp/tools/server/server-context.cpp`. - -## Building and Testing - -- Test llama.cpp backend compilation: `make backends/llama-cpp` -- The backend is built as part of the main build process -- Check `backend/cpp/llama-cpp/Makefile` for build configuration - -## Architecture - -- **grpc-server.cpp**: gRPC server implementation, adapts HTTP server patterns to gRPC -- Uses shared server infrastructure: `server-context.cpp`, `server-task.cpp`, `server-queue.cpp`, `server-common.cpp` -- The gRPC server mirrors the HTTP server's functionality but uses gRPC instead of HTTP - -## Common Issues When Updating llama.cpp - -When fixing compilation errors after upstream changes: -1. Check how `server.cpp` (HTTP server) handles the same change -2. Look for new public APIs or getter methods -3. Store copies of needed data instead of accessing private members -4. Update function calls to match new signatures -5. Test with `make backends/llama-cpp` - -## Key Differences from HTTP Server - -- gRPC uses `BackendServiceImpl` class with gRPC service methods -- HTTP server uses `server_routes` with HTTP handlers -- Both use the same `server_context` and task queue infrastructure -- gRPC methods: `LoadModel`, `Predict`, `PredictStream`, `Embedding`, `Rerank`, `TokenizeString`, `GetMetrics`, `Health` - -## Tool Call Parsing Maintenance - -When working on JSON/XML tool call parsing functionality, always check llama.cpp for reference implementation and updates: - -### Checking for XML Parsing Changes - -1. **Review XML Format Definitions**: Check `llama.cpp/common/chat-parser-xml-toolcall.h` for `xml_tool_call_format` struct changes -2. **Review Parsing Logic**: Check `llama.cpp/common/chat-parser-xml-toolcall.cpp` for parsing algorithm updates -3. **Review Format Presets**: Check `llama.cpp/common/chat-parser.cpp` for new XML format presets (search for `xml_tool_call_format form`) -4. **Review Model Lists**: Check `llama.cpp/common/chat.h` for `COMMON_CHAT_FORMAT_*` enum values that use XML parsing: - - `COMMON_CHAT_FORMAT_GLM_4_5` - - `COMMON_CHAT_FORMAT_MINIMAX_M2` - - `COMMON_CHAT_FORMAT_KIMI_K2` - - `COMMON_CHAT_FORMAT_QWEN3_CODER_XML` - - `COMMON_CHAT_FORMAT_APRIEL_1_5` - - `COMMON_CHAT_FORMAT_XIAOMI_MIMO` - - Any new formats added - -### Model Configuration Options - -Always check `llama.cpp` for new model configuration options that should be supported in LocalAI: - -1. **Check Server Context**: Review `llama.cpp/tools/server/server-context.cpp` for new parameters -2. **Check Chat Params**: Review `llama.cpp/common/chat.h` for `common_chat_params` struct changes -3. **Check Server Options**: Review `llama.cpp/tools/server/server.cpp` for command-line argument changes -4. **Examples of options to check**: - - `ctx_shift` - Context shifting support - - `parallel_tool_calls` - Parallel tool calling - - `reasoning_format` - Reasoning format options - - Any new flags or parameters - -### Implementation Guidelines - -1. **Feature Parity**: Always aim for feature parity with llama.cpp's implementation -2. **Test Coverage**: Add tests for new features matching llama.cpp's behavior -3. **Documentation**: Update relevant documentation when adding new formats or options -4. **Backward Compatibility**: Ensure changes don't break existing functionality - -### Files to Monitor - -- `llama.cpp/common/chat-parser-xml-toolcall.h` - Format definitions -- `llama.cpp/common/chat-parser-xml-toolcall.cpp` - Parsing logic -- `llama.cpp/common/chat-parser.cpp` - Format presets and model-specific handlers -- `llama.cpp/common/chat.h` - Format enums and parameter structures -- `llama.cpp/tools/server/server-context.cpp` - Server configuration options - -# Documentation - -The project documentation is located in `docs/content`. When adding new features or changing existing functionality, it is crucial to update the documentation to reflect these changes. This helps users understand how to use the new capabilities and ensures the documentation stays relevant. - -- **Feature Documentation**: If you add a new feature (like a new backend or API endpoint), create a new markdown file in `docs/content/features/` explaining what it is, how to configure it, and how to use it. -- **Configuration**: If you modify configuration options, update the relevant sections in `docs/content/`. -- **Examples**: providing concrete examples (like YAML configuration blocks) is highly encouraged to help users get started quickly. +- **Logging**: Use `github.com/mudler/xlog` (same API as slog) +- **Go style**: Prefer `any` over `interface{}` +- **Comments**: Explain *why*, not *what* +- **Docs**: Update `docs/content/` when adding features or changing config +- **Build**: Inspect `Makefile` and `.github/workflows/` — ask the user before running long builds +- **UI**: The active UI is the React app in `core/http/react-ui/`. The older Alpine.js/HTML UI in `core/http/static/` is pending deprecation — all new UI work goes in the React UI diff --git a/README.md b/README.md index 699fed47b..8cbb9df7a 100644 --- a/README.md +++ b/README.md @@ -235,7 +235,7 @@ local-ai run oci://localai/phi-2:latest For more information, see [💻 Getting started](https://localai.io/basics/getting_started/index.html), if you are interested in our roadmap items and future enhancements, you can see the [Issues labeled as Roadmap here](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) ## 📰 Latest project news -- March 2026: [Agent management](https://github.com/mudler/LocalAI/pull/8820), [New React UI](https://github.com/mudler/LocalAI/pull/8772), [WebRTC](https://github.com/mudler/LocalAI/pull/8790),[MLX-distributed via P2P and RDMA](https://github.com/mudler/LocalAI/pull/8801) +- March 2026: [Agent management](https://github.com/mudler/LocalAI/pull/8820), [New React UI](https://github.com/mudler/LocalAI/pull/8772), [WebRTC](https://github.com/mudler/LocalAI/pull/8790),[MLX-distributed via P2P and RDMA](https://github.com/mudler/LocalAI/pull/8801), [MCP Apps, MCP Client-side](https://github.com/mudler/LocalAI/pull/8947) - February 2026: [Realtime API for audio-to-audio with tool calling](https://github.com/mudler/LocalAI/pull/6245), [ACE-Step 1.5 support](https://github.com/mudler/LocalAI/pull/8396) - January 2026: **LocalAI 3.10.0** - Major release with Anthropic API support, Open Responses API for stateful agents, video & image generation suite (LTX-2), unified GPU backends, tool streaming & XML parsing, system-aware backend gallery, crash fixes for AVX-only CPUs and AMD VRAM reporting, request tracing, and new backends: **Moonshine** (ultra-fast transcription), **Pocket-TTS** (lightweight TTS). Vulkan arm64 builds now available. [Release notes](https://github.com/mudler/LocalAI/releases/tag/v3.10.0). - December 2025: [Dynamic Memory Resource reclaimer](https://github.com/mudler/LocalAI/pull/7583), [Automatic fitting of models to multiple GPUS(llama.cpp)](https://github.com/mudler/LocalAI/pull/7584), [Added Vibevoice backend](https://github.com/mudler/LocalAI/pull/7494) diff --git a/core/application/application.go b/core/application/application.go index a95410611..7ad486c25 100644 --- a/core/application/application.go +++ b/core/application/application.go @@ -5,6 +5,7 @@ import ( "sync" "github.com/mudler/LocalAI/core/config" + mcpTools "github.com/mudler/LocalAI/core/http/endpoints/mcp" "github.com/mudler/LocalAI/core/services" "github.com/mudler/LocalAI/core/templates" "github.com/mudler/LocalAI/pkg/model" @@ -29,9 +30,16 @@ type Application struct { } func newApplication(appConfig *config.ApplicationConfig) *Application { + ml := model.NewModelLoader(appConfig.SystemState) + + // Close MCP sessions when a model is unloaded (watchdog eviction, manual shutdown, etc.) + ml.OnModelUnload(func(modelName string) { + mcpTools.CloseMCPSessions(modelName) + }) + return &Application{ backendLoader: config.NewModelConfigLoader(appConfig.SystemState.Model.ModelsPath), - modelLoader: model.NewModelLoader(appConfig.SystemState), + modelLoader: ml, applicationConfig: appConfig, templatesEvaluator: templates.NewEvaluator(appConfig.SystemState.Model.ModelsPath), } diff --git a/core/http/endpoints/anthropic/messages.go b/core/http/endpoints/anthropic/messages.go index ab2ecdce9..5119f2df5 100644 --- a/core/http/endpoints/anthropic/messages.go +++ b/core/http/endpoints/anthropic/messages.go @@ -3,11 +3,13 @@ package anthropic import ( "encoding/json" "fmt" + "strings" "github.com/google/uuid" "github.com/labstack/echo/v4" "github.com/mudler/LocalAI/core/backend" "github.com/mudler/LocalAI/core/config" + mcpTools "github.com/mudler/LocalAI/core/http/endpoints/mcp" "github.com/mudler/LocalAI/core/http/middleware" "github.com/mudler/LocalAI/core/schema" "github.com/mudler/LocalAI/core/templates" @@ -48,6 +50,92 @@ func MessagesEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evalu // Convert Anthropic tools to internal Functions format funcs, shouldUseFn := convertAnthropicTools(input, cfg) + // MCP injection: prompts, resources, and tools + var mcpToolInfos []mcpTools.MCPToolInfo + mcpServers := mcpTools.MCPServersFromMetadata(input.Metadata) + mcpPromptName, mcpPromptArgs := mcpTools.MCPPromptFromMetadata(input.Metadata) + mcpResourceURIs := mcpTools.MCPResourcesFromMetadata(input.Metadata) + + if (len(mcpServers) > 0 || mcpPromptName != "" || len(mcpResourceURIs) > 0) && (cfg.MCP.Servers != "" || cfg.MCP.Stdio != "") { + remote, stdio, mcpErr := cfg.MCP.MCPConfigFromYAML() + if mcpErr == nil { + namedSessions, sessErr := mcpTools.NamedSessionsFromMCPConfig(cfg.Name, remote, stdio, mcpServers) + if sessErr == nil && len(namedSessions) > 0 { + // Prompt injection + if mcpPromptName != "" { + prompts, discErr := mcpTools.DiscoverMCPPrompts(c.Request().Context(), namedSessions) + if discErr == nil { + promptMsgs, getErr := mcpTools.GetMCPPrompt(c.Request().Context(), prompts, mcpPromptName, mcpPromptArgs) + if getErr == nil { + var injected []schema.Message + for _, pm := range promptMsgs { + injected = append(injected, schema.Message{ + Role: string(pm.Role), + Content: mcpTools.PromptMessageToText(pm), + }) + } + openAIMessages = append(injected, openAIMessages...) + xlog.Debug("Anthropic MCP prompt injected", "prompt", mcpPromptName, "messages", len(injected)) + } else { + xlog.Error("Failed to get MCP prompt", "error", getErr) + } + } + } + + // Resource injection + if len(mcpResourceURIs) > 0 { + resources, discErr := mcpTools.DiscoverMCPResources(c.Request().Context(), namedSessions) + if discErr == nil { + var resourceTexts []string + for _, uri := range mcpResourceURIs { + content, readErr := mcpTools.ReadMCPResource(c.Request().Context(), resources, uri) + if readErr != nil { + xlog.Error("Failed to read MCP resource", "error", readErr, "uri", uri) + continue + } + name := uri + for _, r := range resources { + if r.URI == uri { + name = r.Name + break + } + } + resourceTexts = append(resourceTexts, fmt.Sprintf("--- MCP Resource: %s ---\n%s", name, content)) + } + if len(resourceTexts) > 0 && len(openAIMessages) > 0 { + lastIdx := len(openAIMessages) - 1 + suffix := "\n\n" + strings.Join(resourceTexts, "\n\n") + switch ct := openAIMessages[lastIdx].Content.(type) { + case string: + openAIMessages[lastIdx].Content = ct + suffix + default: + openAIMessages[lastIdx].Content = fmt.Sprintf("%v%s", ct, suffix) + } + xlog.Debug("Anthropic MCP resources injected", "count", len(resourceTexts)) + } + } + } + + // Tool injection + if len(mcpServers) > 0 { + discovered, discErr := mcpTools.DiscoverMCPTools(c.Request().Context(), namedSessions) + if discErr == nil { + mcpToolInfos = discovered + for _, ti := range mcpToolInfos { + funcs = append(funcs, ti.Function) + } + shouldUseFn = len(funcs) > 0 && cfg.ShouldUseFunctions() + xlog.Debug("Anthropic MCP tools injected", "count", len(mcpToolInfos), "total_funcs", len(funcs)) + } else { + xlog.Error("Failed to discover MCP tools", "error", discErr) + } + } + } + } else { + xlog.Error("Failed to parse MCP config", "error", mcpErr) + } + } + // Create an OpenAI-compatible request for internal processing openAIReq := &schema.OpenAIRequest{ PredictionOptions: schema.PredictionOptions{ @@ -88,138 +176,200 @@ func MessagesEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evalu xlog.Debug("Anthropic Messages - Prompt (after templating)", "prompt", predInput) if input.Stream { - return handleAnthropicStream(c, id, input, cfg, ml, cl, appConfig, predInput, openAIReq, funcs, shouldUseFn) + return handleAnthropicStream(c, id, input, cfg, ml, cl, appConfig, predInput, openAIReq, funcs, shouldUseFn, mcpToolInfos, evaluator) } - return handleAnthropicNonStream(c, id, input, cfg, ml, cl, appConfig, predInput, openAIReq, funcs, shouldUseFn) + return handleAnthropicNonStream(c, id, input, cfg, ml, cl, appConfig, predInput, openAIReq, funcs, shouldUseFn, mcpToolInfos, evaluator) } } -func handleAnthropicNonStream(c echo.Context, id string, input *schema.AnthropicRequest, cfg *config.ModelConfig, ml *model.ModelLoader, cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig, predInput string, openAIReq *schema.OpenAIRequest, funcs functions.Functions, shouldUseFn bool) error { - images := []string{} - for _, m := range openAIReq.Messages { - images = append(images, m.StringImages...) +func handleAnthropicNonStream(c echo.Context, id string, input *schema.AnthropicRequest, cfg *config.ModelConfig, ml *model.ModelLoader, cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig, predInput string, openAIReq *schema.OpenAIRequest, funcs functions.Functions, shouldUseFn bool, mcpToolInfos []mcpTools.MCPToolInfo, evaluator *templates.Evaluator) error { + mcpMaxIterations := 10 + if cfg.Agent.MaxIterations > 0 { + mcpMaxIterations = cfg.Agent.MaxIterations } + hasMCPTools := len(mcpToolInfos) > 0 - toolsJSON := "" - if len(funcs) > 0 { - openAITools := make([]functions.Tool, len(funcs)) - for i, f := range funcs { - openAITools[i] = functions.Tool{Type: "function", Function: f} + for mcpIteration := 0; mcpIteration <= mcpMaxIterations; mcpIteration++ { + // Re-template on each MCP iteration since messages may have changed + if mcpIteration > 0 { + predInput = evaluator.TemplateMessages(*openAIReq, openAIReq.Messages, cfg, funcs, shouldUseFn) + xlog.Debug("Anthropic MCP re-templating", "iteration", mcpIteration, "prompt_len", len(predInput)) } - if toolsBytes, err := json.Marshal(openAITools); err == nil { - toolsJSON = string(toolsBytes) - } - } - toolChoiceJSON := "" - if input.ToolChoice != nil { - if toolChoiceBytes, err := json.Marshal(input.ToolChoice); err == nil { - toolChoiceJSON = string(toolChoiceBytes) - } - } - predFunc, err := backend.ModelInference( - input.Context, predInput, openAIReq.Messages, images, nil, nil, ml, cfg, cl, appConfig, nil, toolsJSON, toolChoiceJSON, nil, nil, nil, input.Metadata) - if err != nil { - xlog.Error("Anthropic model inference failed", "error", err) - return sendAnthropicError(c, 500, "api_error", fmt.Sprintf("model inference failed: %v", err)) - } + images := []string{} + for _, m := range openAIReq.Messages { + images = append(images, m.StringImages...) + } - const maxEmptyRetries = 5 - var prediction backend.LLMResponse - var result string - for attempt := 0; attempt <= maxEmptyRetries; attempt++ { - prediction, err = predFunc() - if err != nil { - xlog.Error("Anthropic prediction failed", "error", err) - return sendAnthropicError(c, 500, "api_error", fmt.Sprintf("prediction failed: %v", err)) - } - result = backend.Finetune(*cfg, predInput, prediction.Response) - if result != "" || !shouldUseFn { - break - } - xlog.Warn("Anthropic: retrying prediction due to empty backend response", "attempt", attempt+1, "maxRetries", maxEmptyRetries) - } - - // Try pre-parsed tool calls from C++ autoparser first, fall back to text parsing - var toolCalls []functions.FuncCallResults - if deltaToolCalls := functions.ToolCallsFromChatDeltas(prediction.ChatDeltas); len(deltaToolCalls) > 0 { - xlog.Debug("[ChatDeltas] Anthropic: using pre-parsed tool calls", "count", len(deltaToolCalls)) - toolCalls = deltaToolCalls - } else { - xlog.Debug("[ChatDeltas] Anthropic: no pre-parsed tool calls, falling back to Go-side text parsing") - toolCalls = functions.ParseFunctionCall(result, cfg.FunctionsConfig) - } - - var contentBlocks []schema.AnthropicContentBlock - var stopReason string - - if shouldUseFn && len(toolCalls) > 0 { - // Model wants to use tools - stopReason = "tool_use" - for _, tc := range toolCalls { - // Parse arguments as JSON - var inputArgs map[string]interface{} - if err := json.Unmarshal([]byte(tc.Arguments), &inputArgs); err != nil { - xlog.Warn("Failed to parse tool call arguments as JSON", "error", err, "args", tc.Arguments) - inputArgs = map[string]interface{}{"raw": tc.Arguments} + toolsJSON := "" + if len(funcs) > 0 { + openAITools := make([]functions.Tool, len(funcs)) + for i, f := range funcs { + openAITools[i] = functions.Tool{Type: "function", Function: f} + } + if toolsBytes, err := json.Marshal(openAITools); err == nil { + toolsJSON = string(toolsBytes) } - - contentBlocks = append(contentBlocks, schema.AnthropicContentBlock{ - Type: "tool_use", - ID: fmt.Sprintf("toolu_%s_%d", id, len(contentBlocks)), - Name: tc.Name, - Input: inputArgs, - }) } - - // Add any text content before the tool calls - textContent := functions.ParseTextContent(result, cfg.FunctionsConfig) - if textContent != "" { - // Prepend text block - contentBlocks = append([]schema.AnthropicContentBlock{{Type: "text", Text: textContent}}, contentBlocks...) + toolChoiceJSON := "" + if input.ToolChoice != nil { + if toolChoiceBytes, err := json.Marshal(input.ToolChoice); err == nil { + toolChoiceJSON = string(toolChoiceBytes) + } } - } else { - // Normal text response - stopReason = "end_turn" - contentBlocks = []schema.AnthropicContentBlock{ - {Type: "text", Text: result}, + + predFunc, err := backend.ModelInference( + input.Context, predInput, openAIReq.Messages, images, nil, nil, ml, cfg, cl, appConfig, nil, toolsJSON, toolChoiceJSON, nil, nil, nil, input.Metadata) + if err != nil { + xlog.Error("Anthropic model inference failed", "error", err) + return sendAnthropicError(c, 500, "api_error", fmt.Sprintf("model inference failed: %v", err)) } - } - resp := &schema.AnthropicResponse{ - ID: fmt.Sprintf("msg_%s", id), - Type: "message", - Role: "assistant", - Model: input.Model, - StopReason: &stopReason, - Content: contentBlocks, - Usage: schema.AnthropicUsage{ - InputTokens: prediction.Usage.Prompt, - OutputTokens: prediction.Usage.Completion, - }, - } + const maxEmptyRetries = 5 + var prediction backend.LLMResponse + var result string + for attempt := 0; attempt <= maxEmptyRetries; attempt++ { + prediction, err = predFunc() + if err != nil { + xlog.Error("Anthropic prediction failed", "error", err) + return sendAnthropicError(c, 500, "api_error", fmt.Sprintf("prediction failed: %v", err)) + } + result = backend.Finetune(*cfg, predInput, prediction.Response) + if result != "" || !shouldUseFn { + break + } + xlog.Warn("Anthropic: retrying prediction due to empty backend response", "attempt", attempt+1, "maxRetries", maxEmptyRetries) + } - if respData, err := json.Marshal(resp); err == nil { - xlog.Debug("Anthropic Response", "response", string(respData)) - } + // Try pre-parsed tool calls from C++ autoparser first, fall back to text parsing + var toolCalls []functions.FuncCallResults + if deltaToolCalls := functions.ToolCallsFromChatDeltas(prediction.ChatDeltas); len(deltaToolCalls) > 0 { + xlog.Debug("[ChatDeltas] Anthropic: using pre-parsed tool calls", "count", len(deltaToolCalls)) + toolCalls = deltaToolCalls + } else { + xlog.Debug("[ChatDeltas] Anthropic: no pre-parsed tool calls, falling back to Go-side text parsing") + toolCalls = functions.ParseFunctionCall(result, cfg.FunctionsConfig) + } - return c.JSON(200, resp) + // MCP server-side tool execution: if any tool calls are MCP tools, execute and loop + if hasMCPTools && shouldUseFn && len(toolCalls) > 0 { + var hasMCPCalls bool + for _, tc := range toolCalls { + if mcpTools.IsMCPTool(mcpToolInfos, tc.Name) { + hasMCPCalls = true + break + } + } + if hasMCPCalls { + // Append assistant message with tool_calls to conversation + assistantMsg := schema.Message{ + Role: "assistant", + Content: result, + } + for i, tc := range toolCalls { + toolCallID := tc.ID + if toolCallID == "" { + toolCallID = fmt.Sprintf("toolu_%s_%d", id, i) + } + assistantMsg.ToolCalls = append(assistantMsg.ToolCalls, schema.ToolCall{ + Index: i, + ID: toolCallID, + Type: "function", + FunctionCall: schema.FunctionCall{ + Name: tc.Name, + Arguments: tc.Arguments, + }, + }) + } + openAIReq.Messages = append(openAIReq.Messages, assistantMsg) + + // Execute each MCP tool call and append results + for _, tc := range assistantMsg.ToolCalls { + if !mcpTools.IsMCPTool(mcpToolInfos, tc.FunctionCall.Name) { + continue + } + xlog.Debug("Executing MCP tool (Anthropic)", "tool", tc.FunctionCall.Name, "iteration", mcpIteration) + toolResult, toolErr := mcpTools.ExecuteMCPToolCall( + c.Request().Context(), mcpToolInfos, + tc.FunctionCall.Name, tc.FunctionCall.Arguments, + ) + if toolErr != nil { + xlog.Error("MCP tool execution failed", "tool", tc.FunctionCall.Name, "error", toolErr) + toolResult = fmt.Sprintf("Error: %v", toolErr) + } + openAIReq.Messages = append(openAIReq.Messages, schema.Message{ + Role: "tool", + Content: toolResult, + StringContent: toolResult, + ToolCallID: tc.ID, + Name: tc.FunctionCall.Name, + }) + } + + xlog.Debug("Anthropic MCP tools executed, re-running inference", "iteration", mcpIteration) + continue // next MCP iteration + } + } + + // No MCP tools to execute, build and return response + var contentBlocks []schema.AnthropicContentBlock + var stopReason string + + if shouldUseFn && len(toolCalls) > 0 { + stopReason = "tool_use" + for _, tc := range toolCalls { + var inputArgs map[string]interface{} + if err := json.Unmarshal([]byte(tc.Arguments), &inputArgs); err != nil { + xlog.Warn("Failed to parse tool call arguments as JSON", "error", err, "args", tc.Arguments) + inputArgs = map[string]interface{}{"raw": tc.Arguments} + } + contentBlocks = append(contentBlocks, schema.AnthropicContentBlock{ + Type: "tool_use", + ID: fmt.Sprintf("toolu_%s_%d", id, len(contentBlocks)), + Name: tc.Name, + Input: inputArgs, + }) + } + textContent := functions.ParseTextContent(result, cfg.FunctionsConfig) + if textContent != "" { + contentBlocks = append([]schema.AnthropicContentBlock{{Type: "text", Text: textContent}}, contentBlocks...) + } + } else { + stopReason = "end_turn" + contentBlocks = []schema.AnthropicContentBlock{ + {Type: "text", Text: result}, + } + } + + resp := &schema.AnthropicResponse{ + ID: fmt.Sprintf("msg_%s", id), + Type: "message", + Role: "assistant", + Model: input.Model, + StopReason: &stopReason, + Content: contentBlocks, + Usage: schema.AnthropicUsage{ + InputTokens: prediction.Usage.Prompt, + OutputTokens: prediction.Usage.Completion, + }, + } + + if respData, err := json.Marshal(resp); err == nil { + xlog.Debug("Anthropic Response", "response", string(respData)) + } + + return c.JSON(200, resp) + } // end MCP iteration loop + + return sendAnthropicError(c, 500, "api_error", "MCP iteration limit reached") } -func handleAnthropicStream(c echo.Context, id string, input *schema.AnthropicRequest, cfg *config.ModelConfig, ml *model.ModelLoader, cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig, predInput string, openAIReq *schema.OpenAIRequest, funcs functions.Functions, shouldUseFn bool) error { +func handleAnthropicStream(c echo.Context, id string, input *schema.AnthropicRequest, cfg *config.ModelConfig, ml *model.ModelLoader, cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig, predInput string, openAIReq *schema.OpenAIRequest, funcs functions.Functions, shouldUseFn bool, mcpToolInfos []mcpTools.MCPToolInfo, evaluator *templates.Evaluator) error { c.Response().Header().Set("Content-Type", "text/event-stream") c.Response().Header().Set("Cache-Control", "no-cache") c.Response().Header().Set("Connection", "keep-alive") - // Create OpenAI messages for inference - openAIMessages := openAIReq.Messages - - images := []string{} - for _, m := range openAIMessages { - images = append(images, m.StringImages...) - } - // Send message_start event messageStart := schema.AnthropicStreamEvent{ Type: "message_start", @@ -234,159 +384,232 @@ func handleAnthropicStream(c echo.Context, id string, input *schema.AnthropicReq } sendAnthropicSSE(c, messageStart) - // Track accumulated content for tool call detection - accumulatedContent := "" - currentBlockIndex := 0 - inToolCall := false - toolCallsEmitted := 0 - - // Send initial content_block_start event - contentBlockStart := schema.AnthropicStreamEvent{ - Type: "content_block_start", - Index: currentBlockIndex, - ContentBlock: &schema.AnthropicContentBlock{Type: "text", Text: ""}, + mcpMaxIterations := 10 + if cfg.Agent.MaxIterations > 0 { + mcpMaxIterations = cfg.Agent.MaxIterations } - sendAnthropicSSE(c, contentBlockStart) + hasMCPTools := len(mcpToolInfos) > 0 - // Stream content deltas - tokenCallback := func(token string, usage backend.TokenUsage) bool { - accumulatedContent += token - - // If we're using functions, try to detect tool calls incrementally - if shouldUseFn { - cleanedResult := functions.CleanupLLMResult(accumulatedContent, cfg.FunctionsConfig) - - // Try parsing for tool calls - toolCalls := functions.ParseFunctionCall(cleanedResult, cfg.FunctionsConfig) - - // If we detected new tool calls and haven't emitted them yet - if len(toolCalls) > toolCallsEmitted { - // Stop the current text block if we were in one - if !inToolCall && currentBlockIndex == 0 { - sendAnthropicSSE(c, schema.AnthropicStreamEvent{ - Type: "content_block_stop", - Index: currentBlockIndex, - }) - currentBlockIndex++ - inToolCall = true + for mcpIteration := 0; mcpIteration <= mcpMaxIterations; mcpIteration++ { + // Re-template on MCP iterations + if mcpIteration > 0 { + predInput = evaluator.TemplateMessages(*openAIReq, openAIReq.Messages, cfg, funcs, shouldUseFn) + xlog.Debug("Anthropic MCP stream re-templating", "iteration", mcpIteration) + } + + openAIMessages := openAIReq.Messages + images := []string{} + for _, m := range openAIMessages { + images = append(images, m.StringImages...) + } + + // Track accumulated content for tool call detection + accumulatedContent := "" + currentBlockIndex := 0 + inToolCall := false + toolCallsEmitted := 0 + + // Send initial content_block_start event + contentBlockStart := schema.AnthropicStreamEvent{ + Type: "content_block_start", + Index: currentBlockIndex, + ContentBlock: &schema.AnthropicContentBlock{Type: "text", Text: ""}, + } + sendAnthropicSSE(c, contentBlockStart) + + // Collect tool calls for MCP execution + var collectedToolCalls []functions.FuncCallResults + + tokenCallback := func(token string, usage backend.TokenUsage) bool { + accumulatedContent += token + + if shouldUseFn { + cleanedResult := functions.CleanupLLMResult(accumulatedContent, cfg.FunctionsConfig) + toolCalls := functions.ParseFunctionCall(cleanedResult, cfg.FunctionsConfig) + + if len(toolCalls) > toolCallsEmitted { + if !inToolCall && currentBlockIndex == 0 { + sendAnthropicSSE(c, schema.AnthropicStreamEvent{ + Type: "content_block_stop", + Index: currentBlockIndex, + }) + currentBlockIndex++ + inToolCall = true + } + + for i := toolCallsEmitted; i < len(toolCalls); i++ { + tc := toolCalls[i] + sendAnthropicSSE(c, schema.AnthropicStreamEvent{ + Type: "content_block_start", + Index: currentBlockIndex, + ContentBlock: &schema.AnthropicContentBlock{ + Type: "tool_use", + ID: fmt.Sprintf("toolu_%s_%d", id, i), + Name: tc.Name, + }, + }) + sendAnthropicSSE(c, schema.AnthropicStreamEvent{ + Type: "content_block_delta", + Index: currentBlockIndex, + Delta: &schema.AnthropicStreamDelta{ + Type: "input_json_delta", + PartialJSON: tc.Arguments, + }, + }) + sendAnthropicSSE(c, schema.AnthropicStreamEvent{ + Type: "content_block_stop", + Index: currentBlockIndex, + }) + currentBlockIndex++ + } + collectedToolCalls = toolCalls + toolCallsEmitted = len(toolCalls) + return true } - - // Emit new tool calls - for i := toolCallsEmitted; i < len(toolCalls); i++ { - tc := toolCalls[i] - - // Send content_block_start for tool_use - sendAnthropicSSE(c, schema.AnthropicStreamEvent{ - Type: "content_block_start", - Index: currentBlockIndex, - ContentBlock: &schema.AnthropicContentBlock{ - Type: "tool_use", - ID: fmt.Sprintf("toolu_%s_%d", id, i), - Name: tc.Name, - }, - }) - - // Send input_json_delta with the arguments - sendAnthropicSSE(c, schema.AnthropicStreamEvent{ - Type: "content_block_delta", - Index: currentBlockIndex, - Delta: &schema.AnthropicStreamDelta{ - Type: "input_json_delta", - PartialJSON: tc.Arguments, - }, - }) - - // Send content_block_stop - sendAnthropicSSE(c, schema.AnthropicStreamEvent{ - Type: "content_block_stop", - Index: currentBlockIndex, - }) - - currentBlockIndex++ - } - toolCallsEmitted = len(toolCalls) - return true + } + + if !inToolCall { + sendAnthropicSSE(c, schema.AnthropicStreamEvent{ + Type: "content_block_delta", + Index: 0, + Delta: &schema.AnthropicStreamDelta{ + Type: "text_delta", + Text: token, + }, + }) + } + return true + } + + toolsJSON := "" + if len(funcs) > 0 { + openAITools := make([]functions.Tool, len(funcs)) + for i, f := range funcs { + openAITools[i] = functions.Tool{Type: "function", Function: f} + } + if toolsBytes, err := json.Marshal(openAITools); err == nil { + toolsJSON = string(toolsBytes) } } - - // Send regular text delta if not in tool call mode + toolChoiceJSON := "" + if input.ToolChoice != nil { + if toolChoiceBytes, err := json.Marshal(input.ToolChoice); err == nil { + toolChoiceJSON = string(toolChoiceBytes) + } + } + + predFunc, err := backend.ModelInference( + input.Context, predInput, openAIMessages, images, nil, nil, ml, cfg, cl, appConfig, tokenCallback, toolsJSON, toolChoiceJSON, nil, nil, nil, input.Metadata) + if err != nil { + xlog.Error("Anthropic stream model inference failed", "error", err) + return sendAnthropicError(c, 500, "api_error", fmt.Sprintf("model inference failed: %v", err)) + } + + prediction, err := predFunc() + if err != nil { + xlog.Error("Anthropic stream prediction failed", "error", err) + return sendAnthropicError(c, 500, "api_error", fmt.Sprintf("prediction failed: %v", err)) + } + + // Also check chat deltas for tool calls + if deltaToolCalls := functions.ToolCallsFromChatDeltas(prediction.ChatDeltas); len(deltaToolCalls) > 0 && len(collectedToolCalls) == 0 { + collectedToolCalls = deltaToolCalls + } + + // MCP streaming tool execution: if we collected MCP tool calls, execute and loop + if hasMCPTools && len(collectedToolCalls) > 0 { + var hasMCPCalls bool + for _, tc := range collectedToolCalls { + if mcpTools.IsMCPTool(mcpToolInfos, tc.Name) { + hasMCPCalls = true + break + } + } + if hasMCPCalls { + // Append assistant message with tool_calls + assistantMsg := schema.Message{ + Role: "assistant", + Content: accumulatedContent, + } + for i, tc := range collectedToolCalls { + toolCallID := tc.ID + if toolCallID == "" { + toolCallID = fmt.Sprintf("toolu_%s_%d", id, i) + } + assistantMsg.ToolCalls = append(assistantMsg.ToolCalls, schema.ToolCall{ + Index: i, + ID: toolCallID, + Type: "function", + FunctionCall: schema.FunctionCall{ + Name: tc.Name, + Arguments: tc.Arguments, + }, + }) + } + openAIReq.Messages = append(openAIReq.Messages, assistantMsg) + + // Execute MCP tool calls + for _, tc := range assistantMsg.ToolCalls { + if !mcpTools.IsMCPTool(mcpToolInfos, tc.FunctionCall.Name) { + continue + } + xlog.Debug("Executing MCP tool (Anthropic stream)", "tool", tc.FunctionCall.Name, "iteration", mcpIteration) + toolResult, toolErr := mcpTools.ExecuteMCPToolCall( + c.Request().Context(), mcpToolInfos, + tc.FunctionCall.Name, tc.FunctionCall.Arguments, + ) + if toolErr != nil { + xlog.Error("MCP tool execution failed", "tool", tc.FunctionCall.Name, "error", toolErr) + toolResult = fmt.Sprintf("Error: %v", toolErr) + } + openAIReq.Messages = append(openAIReq.Messages, schema.Message{ + Role: "tool", + Content: toolResult, + StringContent: toolResult, + ToolCallID: tc.ID, + Name: tc.FunctionCall.Name, + }) + } + + xlog.Debug("Anthropic MCP streaming tools executed, re-running inference", "iteration", mcpIteration) + continue // next MCP iteration + } + } + + // No MCP tools to execute, close stream if !inToolCall { - delta := schema.AnthropicStreamEvent{ - Type: "content_block_delta", + sendAnthropicSSE(c, schema.AnthropicStreamEvent{ + Type: "content_block_stop", Index: 0, - Delta: &schema.AnthropicStreamDelta{ - Type: "text_delta", - Text: token, - }, - } - sendAnthropicSSE(c, delta) + }) } - return true - } - toolsJSON := "" - if len(funcs) > 0 { - openAITools := make([]functions.Tool, len(funcs)) - for i, f := range funcs { - openAITools[i] = functions.Tool{Type: "function", Function: f} + stopReason := "end_turn" + if toolCallsEmitted > 0 { + stopReason = "tool_use" } - if toolsBytes, err := json.Marshal(openAITools); err == nil { - toolsJSON = string(toolsBytes) - } - } - toolChoiceJSON := "" - if input.ToolChoice != nil { - if toolChoiceBytes, err := json.Marshal(input.ToolChoice); err == nil { - toolChoiceJSON = string(toolChoiceBytes) - } - } - predFunc, err := backend.ModelInference( - input.Context, predInput, openAIMessages, images, nil, nil, ml, cfg, cl, appConfig, tokenCallback, toolsJSON, toolChoiceJSON, nil, nil, nil, input.Metadata) - if err != nil { - xlog.Error("Anthropic stream model inference failed", "error", err) - return sendAnthropicError(c, 500, "api_error", fmt.Sprintf("model inference failed: %v", err)) - } + sendAnthropicSSE(c, schema.AnthropicStreamEvent{ + Type: "message_delta", + Delta: &schema.AnthropicStreamDelta{ + StopReason: &stopReason, + }, + Usage: &schema.AnthropicUsage{ + OutputTokens: prediction.Usage.Completion, + }, + }) - prediction, err := predFunc() - if err != nil { - xlog.Error("Anthropic stream prediction failed", "error", err) - return sendAnthropicError(c, 500, "api_error", fmt.Sprintf("prediction failed: %v", err)) - } + sendAnthropicSSE(c, schema.AnthropicStreamEvent{ + Type: "message_stop", + }) - // Send content_block_stop event for last block if we didn't close it yet - if !inToolCall { - contentBlockStop := schema.AnthropicStreamEvent{ - Type: "content_block_stop", - Index: 0, - } - sendAnthropicSSE(c, contentBlockStop) - } + return nil + } // end MCP iteration loop - // Determine stop reason - stopReason := "end_turn" - if toolCallsEmitted > 0 { - stopReason = "tool_use" - } - - // Send message_delta event with stop_reason - messageDelta := schema.AnthropicStreamEvent{ - Type: "message_delta", - Delta: &schema.AnthropicStreamDelta{ - StopReason: &stopReason, - }, - Usage: &schema.AnthropicUsage{ - OutputTokens: prediction.Usage.Completion, - }, - } - sendAnthropicSSE(c, messageDelta) - - // Send message_stop event - messageStop := schema.AnthropicStreamEvent{ + // Safety fallback + sendAnthropicSSE(c, schema.AnthropicStreamEvent{ Type: "message_stop", - } - sendAnthropicSSE(c, messageStop) - + }) return nil } diff --git a/core/http/endpoints/localai/cors_proxy.go b/core/http/endpoints/localai/cors_proxy.go new file mode 100644 index 000000000..d776aa3b3 --- /dev/null +++ b/core/http/endpoints/localai/cors_proxy.go @@ -0,0 +1,108 @@ +package localai + +import ( + "fmt" + "io" + "net/http" + "net/url" + "strings" + "time" + + "github.com/labstack/echo/v4" + "github.com/mudler/LocalAI/core/config" + "github.com/mudler/xlog" +) + +var corsProxyClient = &http.Client{ + Timeout: 10 * time.Minute, +} + +// CORSProxyEndpoint proxies HTTP requests to external MCP servers, +// solving CORS issues for browser-based MCP connections. +// The target URL is passed as a query parameter: /api/cors-proxy?url=https://... +func CORSProxyEndpoint(appConfig *config.ApplicationConfig) echo.HandlerFunc { + return func(c echo.Context) error { + targetURL := c.QueryParam("url") + if targetURL == "" { + return c.JSON(http.StatusBadRequest, map[string]string{"error": "missing 'url' query parameter"}) + } + + parsed, err := url.Parse(targetURL) + if err != nil { + return c.JSON(http.StatusBadRequest, map[string]string{"error": "invalid target URL"}) + } + + if parsed.Scheme != "http" && parsed.Scheme != "https" { + return c.JSON(http.StatusBadRequest, map[string]string{"error": "only http and https schemes are supported"}) + } + + xlog.Debug("CORS proxy request", "method", c.Request().Method, "target", targetURL) + + proxyReq, err := http.NewRequestWithContext( + c.Request().Context(), + c.Request().Method, + targetURL, + c.Request().Body, + ) + if err != nil { + return fmt.Errorf("failed to create proxy request: %w", err) + } + + // Copy headers from the original request, excluding hop-by-hop headers + skipHeaders := map[string]bool{ + "Host": true, "Connection": true, "Keep-Alive": true, + "Transfer-Encoding": true, "Upgrade": true, "Origin": true, + "Referer": true, + } + for key, values := range c.Request().Header { + if skipHeaders[key] { + continue + } + for _, v := range values { + proxyReq.Header.Add(key, v) + } + } + + resp, err := corsProxyClient.Do(proxyReq) + if err != nil { + xlog.Error("CORS proxy request failed", "error", err, "target", targetURL) + return c.JSON(http.StatusBadGateway, map[string]string{"error": "proxy request failed: " + err.Error()}) + } + defer resp.Body.Close() + + // Copy response headers + for key, values := range resp.Header { + lower := strings.ToLower(key) + // Skip CORS headers — we'll set our own + if strings.HasPrefix(lower, "access-control-") { + continue + } + for _, v := range values { + c.Response().Header().Add(key, v) + } + } + + // Set CORS headers to allow browser access + c.Response().Header().Set("Access-Control-Allow-Origin", "*") + c.Response().Header().Set("Access-Control-Allow-Methods", "GET, POST, PUT, DELETE, OPTIONS") + c.Response().Header().Set("Access-Control-Allow-Headers", "*") + c.Response().Header().Set("Access-Control-Expose-Headers", "*") + + c.Response().WriteHeader(resp.StatusCode) + + // Stream the response body + _, err = io.Copy(c.Response().Writer, resp.Body) + return err + } +} + +// CORSProxyOptionsEndpoint handles CORS preflight requests for the proxy. +func CORSProxyOptionsEndpoint() echo.HandlerFunc { + return func(c echo.Context) error { + c.Response().Header().Set("Access-Control-Allow-Origin", "*") + c.Response().Header().Set("Access-Control-Allow-Methods", "GET, POST, PUT, DELETE, OPTIONS") + c.Response().Header().Set("Access-Control-Allow-Headers", "*") + c.Response().Header().Set("Access-Control-Max-Age", "86400") + return c.NoContent(http.StatusNoContent) + } +} diff --git a/core/http/endpoints/localai/mcp.go b/core/http/endpoints/localai/mcp.go index c7c44b67d..0ff75f4a9 100644 --- a/core/http/endpoints/localai/mcp.go +++ b/core/http/endpoints/localai/mcp.go @@ -1,26 +1,19 @@ package localai import ( - "context" - "encoding/json" - "errors" "fmt" - "net" - "time" + "strings" "github.com/labstack/echo/v4" "github.com/mudler/LocalAI/core/config" - mcpTools "github.com/mudler/LocalAI/core/http/endpoints/mcp" + "github.com/mudler/LocalAI/core/http/endpoints/openai" "github.com/mudler/LocalAI/core/http/middleware" "github.com/mudler/LocalAI/core/schema" "github.com/mudler/LocalAI/core/templates" "github.com/mudler/LocalAI/pkg/model" - "github.com/mudler/cogito" - "github.com/mudler/cogito/clients" - "github.com/mudler/xlog" ) -// MCP SSE Event Types +// MCP SSE Event Types (kept for backward compatibility with MCP endpoint consumers) type MCPReasoningEvent struct { Type string `json:"type"` Content string `json:"content"` @@ -54,262 +47,53 @@ type MCPErrorEvent struct { Message string `json:"message"` } -// MCPEndpoint is the endpoint for MCP chat completions. Supports SSE mode, but it is not compatible with the OpenAI apis. -// @Summary Stream MCP chat completions with reasoning, tool calls, and results +// MCPEndpoint is the endpoint for MCP chat completions. +// It enables all MCP servers for the model and delegates to the standard chat endpoint, +// which handles MCP tool injection and server-side execution. +// Both streaming and non-streaming modes use standard OpenAI response format. +// @Summary MCP chat completions with automatic tool execution // @Param request body schema.OpenAIRequest true "query params" // @Success 200 {object} schema.OpenAIResponse "Response" // @Router /v1/mcp/chat/completions [post] func MCPEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, appConfig *config.ApplicationConfig) echo.HandlerFunc { + chatHandler := openai.ChatEndpoint(cl, ml, evaluator, appConfig) + return func(c echo.Context) error { - ctx := c.Request().Context() - created := int(time.Now().Unix()) - - // Handle Correlation - id := c.Request().Header.Get("X-Correlation-ID") - if id == "" { - id = fmt.Sprintf("mcp-%d", time.Now().UnixNano()) - } - input, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.OpenAIRequest) if !ok || input.Model == "" { return echo.ErrBadRequest } - config, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.ModelConfig) - if !ok || config == nil { + modelConfig, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.ModelConfig) + if !ok || modelConfig == nil { return echo.ErrBadRequest } - if config.MCP.Servers == "" && config.MCP.Stdio == "" { + if modelConfig.MCP.Servers == "" && modelConfig.MCP.Stdio == "" { return fmt.Errorf("no MCP servers configured") } - // Get MCP config from model config - remote, stdio, err := config.MCP.MCPConfigFromYAML() - if err != nil { - return fmt.Errorf("failed to get MCP config: %w", err) + // Enable all MCP servers if none explicitly specified (preserve original behavior) + if input.Metadata == nil { + input.Metadata = map[string]string{} } - - // Check if we have tools in cache, or we have to have an initial connection - sessions, err := mcpTools.SessionsFromMCPConfig(config.Name, remote, stdio) - if err != nil { - return fmt.Errorf("failed to get MCP sessions: %w", err) - } - - if len(sessions) == 0 { - return fmt.Errorf("no working MCP servers found") - } - - // Build fragment from messages - fragment := cogito.NewEmptyFragment() - for _, message := range input.Messages { - fragment = fragment.AddMessage(cogito.MessageRole(message.Role), message.StringContent) - } - - _, port, err := net.SplitHostPort(appConfig.APIAddress) - if err != nil { - return err - } - apiKey := "" - if len(appConfig.ApiKeys) > 0 { - apiKey = appConfig.ApiKeys[0] - } - - ctxWithCancellation, cancel := context.WithCancel(ctx) - defer cancel() - - // TODO: instead of connecting to the API, we should just wire this internally - // and act like completion.go. - // We can do this as cogito expects an interface and we can create one that - // we satisfy to just call internally ComputeChoices - defaultLLM := clients.NewLocalAILLM(config.Name, apiKey, "http://127.0.0.1:"+port) - - // Build cogito options using the consolidated method - cogitoOpts := config.BuildCogitoOptions() - cogitoOpts = append( - cogitoOpts, - cogito.WithContext(ctxWithCancellation), - cogito.WithMCPs(sessions...), - ) - // Check if streaming is requested - toStream := input.Stream - - if !toStream { - // Non-streaming mode: execute synchronously and return JSON response - cogitoOpts = append( - cogitoOpts, - cogito.WithStatusCallback(func(s string) { - xlog.Debug("[model agent] Status", "model", config.Name, "status", s) - }), - cogito.WithReasoningCallback(func(s string) { - xlog.Debug("[model agent] Reasoning", "model", config.Name, "reasoning", s) - }), - cogito.WithToolCallBack(func(t *cogito.ToolChoice, state *cogito.SessionState) cogito.ToolCallDecision { - xlog.Debug("[model agent] Tool call", "model", config.Name, "tool", t.Name, "reasoning", t.Reasoning, "arguments", t.Arguments) - return cogito.ToolCallDecision{ - Approved: true, - } - }), - cogito.WithToolCallResultCallback(func(t cogito.ToolStatus) { - xlog.Debug("[model agent] Tool call result", "model", config.Name, "tool", t.Name, "result", t.Result, "tool_arguments", t.ToolArguments) - }), - ) - - f, err := cogito.ExecuteTools( - defaultLLM, fragment, - cogitoOpts..., - ) - if err != nil && !errors.Is(err, cogito.ErrNoToolSelected) { - return err + if _, hasMCP := input.Metadata["mcp_servers"]; !hasMCP { + remote, stdio, err := modelConfig.MCP.MCPConfigFromYAML() + if err != nil { + return fmt.Errorf("failed to get MCP config: %w", err) } - - resp := &schema.OpenAIResponse{ - ID: id, - Created: created, - Model: input.Model, // we have to return what the user sent here, due to OpenAI spec. - Choices: []schema.Choice{{Message: &schema.Message{Role: "assistant", Content: &f.LastMessage().Content}}}, - Object: "chat.completion", + var allServers []string + for name := range remote.Servers { + allServers = append(allServers, name) } - - jsonResult, _ := json.Marshal(resp) - xlog.Debug("Response", "response", string(jsonResult)) - - // Return the prediction in the response body - return c.JSON(200, resp) + for name := range stdio.Servers { + allServers = append(allServers, name) + } + input.Metadata["mcp_servers"] = strings.Join(allServers, ",") } - // Streaming mode: use SSE - // Set up SSE headers - c.Response().Header().Set("Content-Type", "text/event-stream") - c.Response().Header().Set("Cache-Control", "no-cache") - c.Response().Header().Set("Connection", "keep-alive") - c.Response().Header().Set("X-Correlation-ID", id) - - // Create channel for streaming events - events := make(chan interface{}) - ended := make(chan error, 1) - - // Set up callbacks for streaming - statusCallback := func(s string) { - events <- MCPStatusEvent{ - Type: "status", - Message: s, - } - } - - reasoningCallback := func(s string) { - events <- MCPReasoningEvent{ - Type: "reasoning", - Content: s, - } - } - - toolCallCallback := func(t *cogito.ToolChoice, state *cogito.SessionState) cogito.ToolCallDecision { - events <- MCPToolCallEvent{ - Type: "tool_call", - Name: t.Name, - Arguments: t.Arguments, - Reasoning: t.Reasoning, - } - return cogito.ToolCallDecision{ - Approved: true, - } - } - - toolCallResultCallback := func(t cogito.ToolStatus) { - events <- MCPToolResultEvent{ - Type: "tool_result", - Name: t.Name, - Result: t.Result, - } - } - - cogitoOpts = append(cogitoOpts, - cogito.WithStatusCallback(statusCallback), - cogito.WithReasoningCallback(reasoningCallback), - cogito.WithToolCallBack(toolCallCallback), - cogito.WithToolCallResultCallback(toolCallResultCallback), - ) - - // Execute tools in a goroutine - go func() { - defer close(events) - - f, err := cogito.ExecuteTools( - defaultLLM, fragment, - cogitoOpts..., - ) - if err != nil && !errors.Is(err, cogito.ErrNoToolSelected) { - events <- MCPErrorEvent{ - Type: "error", - Message: fmt.Sprintf("Failed to execute tools: %v", err), - } - ended <- err - return - } - - // Stream final assistant response - content := f.LastMessage().Content - events <- MCPAssistantEvent{ - Type: "assistant", - Content: content, - } - - ended <- nil - }() - - // Stream events to client - LOOP: - for { - select { - case <-ctx.Done(): - // Context was cancelled (client disconnected or request cancelled) - xlog.Debug("Request context cancelled, stopping stream") - cancel() - break LOOP - case event := <-events: - if event == nil { - // Channel closed - break LOOP - } - eventData, err := json.Marshal(event) - if err != nil { - xlog.Debug("Failed to marshal event", "error", err) - continue - } - xlog.Debug("Sending event", "event", string(eventData)) - _, err = fmt.Fprintf(c.Response().Writer, "data: %s\n\n", string(eventData)) - if err != nil { - xlog.Debug("Sending event failed", "error", err) - cancel() - return err - } - c.Response().Flush() - case err := <-ended: - if err == nil { - // Send done signal - fmt.Fprintf(c.Response().Writer, "data: [DONE]\n\n") - c.Response().Flush() - break LOOP - } - xlog.Error("Stream ended with error", "error", err) - errorEvent := MCPErrorEvent{ - Type: "error", - Message: err.Error(), - } - errorData, marshalErr := json.Marshal(errorEvent) - if marshalErr != nil { - fmt.Fprintf(c.Response().Writer, "data: {\"type\":\"error\",\"message\":\"Internal error\"}\n\n") - } else { - fmt.Fprintf(c.Response().Writer, "data: %s\n\n", string(errorData)) - } - fmt.Fprintf(c.Response().Writer, "data: [DONE]\n\n") - c.Response().Flush() - return nil - } - } - - xlog.Debug("Stream ended") - return nil + // Delegate to the standard chat endpoint which handles MCP tool + // injection and server-side execution for both streaming and non-streaming. + return chatHandler(c) } } diff --git a/core/http/endpoints/localai/mcp_prompts.go b/core/http/endpoints/localai/mcp_prompts.go new file mode 100644 index 000000000..8f04ee6c8 --- /dev/null +++ b/core/http/endpoints/localai/mcp_prompts.go @@ -0,0 +1,141 @@ +package localai + +import ( + "fmt" + + "github.com/labstack/echo/v4" + "github.com/mudler/LocalAI/core/config" + mcpTools "github.com/mudler/LocalAI/core/http/endpoints/mcp" +) + +// MCPPromptsEndpoint returns the list of MCP prompts for a given model. +// GET /v1/mcp/prompts/:model +func MCPPromptsEndpoint(cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc { + return func(c echo.Context) error { + modelName := c.Param("model") + if modelName == "" { + return echo.ErrBadRequest + } + + cfg, exists := cl.GetModelConfig(modelName) + if !exists { + return fmt.Errorf("model %q not found", modelName) + } + + if cfg.MCP.Servers == "" && cfg.MCP.Stdio == "" { + return c.JSON(200, []any{}) + } + + remote, stdio, err := cfg.MCP.MCPConfigFromYAML() + if err != nil { + return fmt.Errorf("failed to parse MCP config: %w", err) + } + + namedSessions, err := mcpTools.NamedSessionsFromMCPConfig(cfg.Name, remote, stdio, nil) + if err != nil { + return fmt.Errorf("failed to get MCP sessions: %w", err) + } + + prompts, err := mcpTools.DiscoverMCPPrompts(c.Request().Context(), namedSessions) + if err != nil { + return fmt.Errorf("failed to discover MCP prompts: %w", err) + } + + type promptArgJSON struct { + Name string `json:"name"` + Description string `json:"description,omitempty"` + Required bool `json:"required,omitempty"` + } + type promptJSON struct { + Name string `json:"name"` + Description string `json:"description,omitempty"` + Title string `json:"title,omitempty"` + Arguments []promptArgJSON `json:"arguments,omitempty"` + Server string `json:"server"` + } + + var result []promptJSON + for _, p := range prompts { + pj := promptJSON{ + Name: p.PromptName, + Description: p.Description, + Title: p.Title, + Server: p.ServerName, + } + for _, arg := range p.Arguments { + pj.Arguments = append(pj.Arguments, promptArgJSON{ + Name: arg.Name, + Description: arg.Description, + Required: arg.Required, + }) + } + result = append(result, pj) + } + + return c.JSON(200, result) + } +} + +// MCPGetPromptEndpoint expands a prompt by name with the given arguments. +// POST /v1/mcp/prompts/:model/:prompt +func MCPGetPromptEndpoint(cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc { + return func(c echo.Context) error { + modelName := c.Param("model") + promptName := c.Param("prompt") + if modelName == "" || promptName == "" { + return echo.ErrBadRequest + } + + cfg, exists := cl.GetModelConfig(modelName) + if !exists { + return fmt.Errorf("model %q not found", modelName) + } + + if cfg.MCP.Servers == "" && cfg.MCP.Stdio == "" { + return fmt.Errorf("no MCP servers configured for model %q", modelName) + } + + var req struct { + Arguments map[string]string `json:"arguments"` + } + if err := c.Bind(&req); err != nil { + return echo.ErrBadRequest + } + + remote, stdio, err := cfg.MCP.MCPConfigFromYAML() + if err != nil { + return fmt.Errorf("failed to parse MCP config: %w", err) + } + + namedSessions, err := mcpTools.NamedSessionsFromMCPConfig(cfg.Name, remote, stdio, nil) + if err != nil { + return fmt.Errorf("failed to get MCP sessions: %w", err) + } + + prompts, err := mcpTools.DiscoverMCPPrompts(c.Request().Context(), namedSessions) + if err != nil { + return fmt.Errorf("failed to discover MCP prompts: %w", err) + } + + messages, err := mcpTools.GetMCPPrompt(c.Request().Context(), prompts, promptName, req.Arguments) + if err != nil { + return fmt.Errorf("failed to get prompt: %w", err) + } + + type messageJSON struct { + Role string `json:"role"` + Content string `json:"content"` + } + var result []messageJSON + for _, m := range messages { + result = append(result, messageJSON{ + Role: string(m.Role), + Content: mcpTools.PromptMessageToText(m), + }) + } + + return c.JSON(200, map[string]any{ + "messages": result, + }) + } +} diff --git a/core/http/endpoints/localai/mcp_resources.go b/core/http/endpoints/localai/mcp_resources.go new file mode 100644 index 000000000..0cacec3c1 --- /dev/null +++ b/core/http/endpoints/localai/mcp_resources.go @@ -0,0 +1,127 @@ +package localai + +import ( + "fmt" + + "github.com/labstack/echo/v4" + "github.com/mudler/LocalAI/core/config" + mcpTools "github.com/mudler/LocalAI/core/http/endpoints/mcp" +) + +// MCPResourcesEndpoint returns the list of MCP resources for a given model. +// GET /v1/mcp/resources/:model +func MCPResourcesEndpoint(cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc { + return func(c echo.Context) error { + modelName := c.Param("model") + if modelName == "" { + return echo.ErrBadRequest + } + + cfg, exists := cl.GetModelConfig(modelName) + if !exists { + return fmt.Errorf("model %q not found", modelName) + } + + if cfg.MCP.Servers == "" && cfg.MCP.Stdio == "" { + return c.JSON(200, []any{}) + } + + remote, stdio, err := cfg.MCP.MCPConfigFromYAML() + if err != nil { + return fmt.Errorf("failed to parse MCP config: %w", err) + } + + namedSessions, err := mcpTools.NamedSessionsFromMCPConfig(cfg.Name, remote, stdio, nil) + if err != nil { + return fmt.Errorf("failed to get MCP sessions: %w", err) + } + + resources, err := mcpTools.DiscoverMCPResources(c.Request().Context(), namedSessions) + if err != nil { + return fmt.Errorf("failed to discover MCP resources: %w", err) + } + + type resourceJSON struct { + Name string `json:"name"` + URI string `json:"uri"` + Description string `json:"description,omitempty"` + MIMEType string `json:"mimeType,omitempty"` + Server string `json:"server"` + } + + var result []resourceJSON + for _, r := range resources { + result = append(result, resourceJSON{ + Name: r.Name, + URI: r.URI, + Description: r.Description, + MIMEType: r.MIMEType, + Server: r.ServerName, + }) + } + + return c.JSON(200, result) + } +} + +// MCPReadResourceEndpoint reads a specific MCP resource by URI. +// POST /v1/mcp/resources/:model/read +func MCPReadResourceEndpoint(cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc { + return func(c echo.Context) error { + modelName := c.Param("model") + if modelName == "" { + return echo.ErrBadRequest + } + + cfg, exists := cl.GetModelConfig(modelName) + if !exists { + return fmt.Errorf("model %q not found", modelName) + } + + if cfg.MCP.Servers == "" && cfg.MCP.Stdio == "" { + return fmt.Errorf("no MCP servers configured for model %q", modelName) + } + + var req struct { + URI string `json:"uri"` + } + if err := c.Bind(&req); err != nil || req.URI == "" { + return echo.ErrBadRequest + } + + remote, stdio, err := cfg.MCP.MCPConfigFromYAML() + if err != nil { + return fmt.Errorf("failed to parse MCP config: %w", err) + } + + namedSessions, err := mcpTools.NamedSessionsFromMCPConfig(cfg.Name, remote, stdio, nil) + if err != nil { + return fmt.Errorf("failed to get MCP sessions: %w", err) + } + + resources, err := mcpTools.DiscoverMCPResources(c.Request().Context(), namedSessions) + if err != nil { + return fmt.Errorf("failed to discover MCP resources: %w", err) + } + + content, err := mcpTools.ReadMCPResource(c.Request().Context(), resources, req.URI) + if err != nil { + return fmt.Errorf("failed to read resource: %w", err) + } + + // Find the resource info for mimeType + mimeType := "" + for _, r := range resources { + if r.URI == req.URI { + mimeType = r.MIMEType + break + } + } + + return c.JSON(200, map[string]any{ + "uri": req.URI, + "content": content, + "mimeType": mimeType, + }) + } +} diff --git a/core/http/endpoints/localai/mcp_tools.go b/core/http/endpoints/localai/mcp_tools.go new file mode 100644 index 000000000..0ec43529b --- /dev/null +++ b/core/http/endpoints/localai/mcp_tools.go @@ -0,0 +1,91 @@ +package localai + +import ( + "fmt" + + "github.com/labstack/echo/v4" + "github.com/mudler/LocalAI/core/config" + mcpTools "github.com/mudler/LocalAI/core/http/endpoints/mcp" + "github.com/mudler/LocalAI/core/http/middleware" +) + +// MCPServersEndpoint returns the list of MCP servers and their tools for a given model. +// GET /v1/mcp/servers/:model +func MCPServersEndpoint(cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc { + return func(c echo.Context) error { + modelName := c.Param("model") + if modelName == "" { + return echo.ErrBadRequest + } + + cfg, exists := cl.GetModelConfig(modelName) + if !exists { + return fmt.Errorf("model %q not found", modelName) + } + + if cfg.MCP.Servers == "" && cfg.MCP.Stdio == "" { + return c.JSON(200, map[string]any{ + "model": modelName, + "servers": []any{}, + }) + } + + remote, stdio, err := cfg.MCP.MCPConfigFromYAML() + if err != nil { + return fmt.Errorf("failed to parse MCP config: %w", err) + } + + namedSessions, err := mcpTools.NamedSessionsFromMCPConfig(cfg.Name, remote, stdio, nil) + if err != nil { + return fmt.Errorf("failed to get MCP sessions: %w", err) + } + + servers, err := mcpTools.ListMCPServers(c.Request().Context(), namedSessions) + if err != nil { + return fmt.Errorf("failed to list MCP servers: %w", err) + } + + return c.JSON(200, map[string]any{ + "model": modelName, + "servers": servers, + }) + } +} + +// MCPServersEndpointFromMiddleware is a version that uses the middleware-resolved model config. +// This allows it to use the same middleware chain as other endpoints. +func MCPServersEndpointFromMiddleware() echo.HandlerFunc { + return func(c echo.Context) error { + cfg, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.ModelConfig) + if !ok || cfg == nil { + return echo.ErrBadRequest + } + + if cfg.MCP.Servers == "" && cfg.MCP.Stdio == "" { + return c.JSON(200, map[string]any{ + "model": cfg.Name, + "servers": []any{}, + }) + } + + remote, stdio, err := cfg.MCP.MCPConfigFromYAML() + if err != nil { + return fmt.Errorf("failed to parse MCP config: %w", err) + } + + namedSessions, err := mcpTools.NamedSessionsFromMCPConfig(cfg.Name, remote, stdio, nil) + if err != nil { + return fmt.Errorf("failed to get MCP sessions: %w", err) + } + + servers, err := mcpTools.ListMCPServers(c.Request().Context(), namedSessions) + if err != nil { + return fmt.Errorf("failed to list MCP servers: %w", err) + } + + return c.JSON(200, map[string]any{ + "model": cfg.Name, + "servers": servers, + }) + } +} diff --git a/core/http/endpoints/mcp/tools.go b/core/http/endpoints/mcp/tools.go index 7954e85b6..fde990f01 100644 --- a/core/http/endpoints/mcp/tools.go +++ b/core/http/endpoints/mcp/tools.go @@ -2,32 +2,109 @@ package mcp import ( "context" + "encoding/json" + "fmt" "net/http" "os" "os/exec" + "strings" "sync" "time" "github.com/mudler/LocalAI/core/config" + "github.com/mudler/LocalAI/pkg/functions" "github.com/mudler/LocalAI/pkg/signals" "github.com/modelcontextprotocol/go-sdk/mcp" "github.com/mudler/xlog" ) +// NamedSession pairs an MCP session with its server name and type. +type NamedSession struct { + Name string + Type string // "remote" or "stdio" + Session *mcp.ClientSession +} + +// MCPToolInfo holds a discovered MCP tool along with its origin session. +type MCPToolInfo struct { + ServerName string + ToolName string + Function functions.Function + Session *mcp.ClientSession +} + +// MCPServerInfo describes an MCP server and its available tools, prompts, and resources. +type MCPServerInfo struct { + Name string `json:"name"` + Type string `json:"type"` + Tools []string `json:"tools"` + Prompts []string `json:"prompts,omitempty"` + Resources []string `json:"resources,omitempty"` +} + +// MCPPromptInfo holds a discovered MCP prompt along with its origin session. +type MCPPromptInfo struct { + ServerName string + PromptName string + Description string + Title string + Arguments []*mcp.PromptArgument + Session *mcp.ClientSession +} + +// MCPResourceInfo holds a discovered MCP resource along with its origin session. +type MCPResourceInfo struct { + ServerName string + Name string + URI string + Description string + MIMEType string + Session *mcp.ClientSession +} + type sessionCache struct { - mu sync.Mutex - cache map[string][]*mcp.ClientSession + mu sync.Mutex + cache map[string][]*mcp.ClientSession + cancels map[string]context.CancelFunc +} + +type namedSessionCache struct { + mu sync.Mutex + cache map[string][]NamedSession + cancels map[string]context.CancelFunc } var ( cache = sessionCache{ - cache: make(map[string][]*mcp.ClientSession), + cache: make(map[string][]*mcp.ClientSession), + cancels: make(map[string]context.CancelFunc), + } + + namedCache = namedSessionCache{ + cache: make(map[string][]NamedSession), + cancels: make(map[string]context.CancelFunc), } client = mcp.NewClient(&mcp.Implementation{Name: "LocalAI", Version: "v1.0.0"}, nil) ) +// MCPServersFromMetadata extracts the MCP server list from the metadata map +// and returns the list. The "mcp_servers" key is consumed (deleted from the map) +// so it doesn't leak to the backend. +func MCPServersFromMetadata(metadata map[string]string) []string { + raw, ok := metadata["mcp_servers"] + if !ok || raw == "" { + return nil + } + delete(metadata, "mcp_servers") + servers := strings.Split(raw, ",") + for i := range servers { + servers[i] = strings.TrimSpace(servers[i]) + } + return servers +} + func SessionsFromMCPConfig( name string, remote config.MCPGenericConfig[config.MCPRemoteServers], @@ -83,16 +160,461 @@ func SessionsFromMCPConfig( allSessions = append(allSessions, mcpSession) } - signals.RegisterGracefulTerminationHandler(func() { - for _, session := range allSessions { - session.Close() - } - cancel() - }) + cache.cancels[name] = cancel return allSessions, nil } +// NamedSessionsFromMCPConfig returns sessions with their server names preserved. +// If enabledServers is non-empty, only servers with matching names are returned. +func NamedSessionsFromMCPConfig( + name string, + remote config.MCPGenericConfig[config.MCPRemoteServers], + stdio config.MCPGenericConfig[config.MCPSTDIOServers], + enabledServers []string, +) ([]NamedSession, error) { + namedCache.mu.Lock() + defer namedCache.mu.Unlock() + + allSessions, exists := namedCache.cache[name] + if !exists { + ctx, cancel := context.WithCancel(context.Background()) + + for serverName, server := range remote.Servers { + xlog.Debug("[MCP remote server] Configuration", "name", serverName, "server", server) + httpClient := &http.Client{ + Timeout: 360 * time.Second, + Transport: newBearerTokenRoundTripper(server.Token, http.DefaultTransport), + } + + transport := &mcp.StreamableClientTransport{Endpoint: server.URL, HTTPClient: httpClient} + mcpSession, err := client.Connect(ctx, transport, nil) + if err != nil { + xlog.Error("Failed to connect to MCP server", "error", err, "name", serverName, "url", server.URL) + continue + } + xlog.Debug("[MCP remote server] Connected", "name", serverName, "url", server.URL) + allSessions = append(allSessions, NamedSession{ + Name: serverName, + Type: "remote", + Session: mcpSession, + }) + } + + for serverName, server := range stdio.Servers { + xlog.Debug("[MCP stdio server] Configuration", "name", serverName, "server", server) + command := exec.Command(server.Command, server.Args...) + command.Env = os.Environ() + for key, value := range server.Env { + command.Env = append(command.Env, key+"="+value) + } + transport := &mcp.CommandTransport{Command: command} + mcpSession, err := client.Connect(ctx, transport, nil) + if err != nil { + xlog.Error("Failed to start MCP server", "error", err, "name", serverName, "command", command) + continue + } + xlog.Debug("[MCP stdio server] Connected", "name", serverName, "command", command) + allSessions = append(allSessions, NamedSession{ + Name: serverName, + Type: "stdio", + Session: mcpSession, + }) + } + + namedCache.cache[name] = allSessions + namedCache.cancels[name] = cancel + } + + if len(enabledServers) == 0 { + return allSessions, nil + } + + enabled := make(map[string]bool, len(enabledServers)) + for _, s := range enabledServers { + enabled[s] = true + } + var filtered []NamedSession + for _, ns := range allSessions { + if enabled[ns.Name] { + filtered = append(filtered, ns) + } + } + return filtered, nil +} + +// DiscoverMCPTools queries each session for its tools and converts them to functions.Function. +// Deduplicates by tool name (first server wins). +func DiscoverMCPTools(ctx context.Context, sessions []NamedSession) ([]MCPToolInfo, error) { + seen := make(map[string]bool) + var result []MCPToolInfo + + for _, ns := range sessions { + toolsResult, err := ns.Session.ListTools(ctx, nil) + if err != nil { + xlog.Error("Failed to list tools from MCP server", "error", err, "server", ns.Name) + continue + } + for _, tool := range toolsResult.Tools { + if seen[tool.Name] { + continue + } + seen[tool.Name] = true + + f := functions.Function{ + Name: tool.Name, + Description: tool.Description, + } + + // Convert InputSchema to map[string]interface{} for functions.Function + if tool.InputSchema != nil { + schemaBytes, err := json.Marshal(tool.InputSchema) + if err == nil { + var params map[string]interface{} + if json.Unmarshal(schemaBytes, ¶ms) == nil { + f.Parameters = params + } + } + } + if f.Parameters == nil { + f.Parameters = map[string]interface{}{ + "type": "object", + "properties": map[string]interface{}{}, + } + } + + result = append(result, MCPToolInfo{ + ServerName: ns.Name, + ToolName: tool.Name, + Function: f, + Session: ns.Session, + }) + } + } + return result, nil +} + +// ExecuteMCPToolCall finds the matching tool and executes it. +func ExecuteMCPToolCall(ctx context.Context, tools []MCPToolInfo, toolName string, arguments string) (string, error) { + var toolInfo *MCPToolInfo + for i := range tools { + if tools[i].ToolName == toolName { + toolInfo = &tools[i] + break + } + } + if toolInfo == nil { + return "", fmt.Errorf("MCP tool %q not found", toolName) + } + + var args map[string]any + if arguments != "" { + if err := json.Unmarshal([]byte(arguments), &args); err != nil { + return "", fmt.Errorf("failed to parse arguments for tool %q: %w", toolName, err) + } + } + + result, err := toolInfo.Session.CallTool(ctx, &mcp.CallToolParams{ + Name: toolName, + Arguments: args, + }) + if err != nil { + return "", fmt.Errorf("MCP tool %q call failed: %w", toolName, err) + } + + // Extract text content from result + var texts []string + for _, content := range result.Content { + if tc, ok := content.(*mcp.TextContent); ok { + texts = append(texts, tc.Text) + } + } + if len(texts) == 0 { + // Fallback: marshal the whole result + data, _ := json.Marshal(result.Content) + return string(data), nil + } + if len(texts) == 1 { + return texts[0], nil + } + combined, _ := json.Marshal(texts) + return string(combined), nil +} + +// ListMCPServers returns server info with tool, prompt, and resource names for each session. +func ListMCPServers(ctx context.Context, sessions []NamedSession) ([]MCPServerInfo, error) { + var result []MCPServerInfo + for _, ns := range sessions { + info := MCPServerInfo{ + Name: ns.Name, + Type: ns.Type, + } + toolsResult, err := ns.Session.ListTools(ctx, nil) + if err != nil { + xlog.Error("Failed to list tools from MCP server", "error", err, "server", ns.Name) + } else { + for _, tool := range toolsResult.Tools { + info.Tools = append(info.Tools, tool.Name) + } + } + + promptsResult, err := ns.Session.ListPrompts(ctx, nil) + if err != nil { + xlog.Debug("Failed to list prompts from MCP server", "error", err, "server", ns.Name) + } else { + for _, p := range promptsResult.Prompts { + info.Prompts = append(info.Prompts, p.Name) + } + } + + resourcesResult, err := ns.Session.ListResources(ctx, nil) + if err != nil { + xlog.Debug("Failed to list resources from MCP server", "error", err, "server", ns.Name) + } else { + for _, r := range resourcesResult.Resources { + info.Resources = append(info.Resources, r.URI) + } + } + + result = append(result, info) + } + return result, nil +} + +// IsMCPTool checks if a tool name is in the MCP tool list. +func IsMCPTool(tools []MCPToolInfo, name string) bool { + for _, t := range tools { + if t.ToolName == name { + return true + } + } + return false +} + +// DiscoverMCPPrompts queries each session for its prompts. +// Deduplicates by prompt name (first server wins). +func DiscoverMCPPrompts(ctx context.Context, sessions []NamedSession) ([]MCPPromptInfo, error) { + seen := make(map[string]bool) + var result []MCPPromptInfo + + for _, ns := range sessions { + promptsResult, err := ns.Session.ListPrompts(ctx, nil) + if err != nil { + xlog.Error("Failed to list prompts from MCP server", "error", err, "server", ns.Name) + continue + } + for _, p := range promptsResult.Prompts { + if seen[p.Name] { + continue + } + seen[p.Name] = true + result = append(result, MCPPromptInfo{ + ServerName: ns.Name, + PromptName: p.Name, + Description: p.Description, + Title: p.Title, + Arguments: p.Arguments, + Session: ns.Session, + }) + } + } + return result, nil +} + +// GetMCPPrompt finds and expands a prompt by name using the discovered prompts list. +func GetMCPPrompt(ctx context.Context, prompts []MCPPromptInfo, name string, args map[string]string) ([]*mcp.PromptMessage, error) { + var info *MCPPromptInfo + for i := range prompts { + if prompts[i].PromptName == name { + info = &prompts[i] + break + } + } + if info == nil { + return nil, fmt.Errorf("MCP prompt %q not found", name) + } + + result, err := info.Session.GetPrompt(ctx, &mcp.GetPromptParams{ + Name: name, + Arguments: args, + }) + if err != nil { + return nil, fmt.Errorf("MCP prompt %q get failed: %w", name, err) + } + return result.Messages, nil +} + +// DiscoverMCPResources queries each session for its resources. +// Deduplicates by URI (first server wins). +func DiscoverMCPResources(ctx context.Context, sessions []NamedSession) ([]MCPResourceInfo, error) { + seen := make(map[string]bool) + var result []MCPResourceInfo + + for _, ns := range sessions { + resourcesResult, err := ns.Session.ListResources(ctx, nil) + if err != nil { + xlog.Error("Failed to list resources from MCP server", "error", err, "server", ns.Name) + continue + } + for _, r := range resourcesResult.Resources { + if seen[r.URI] { + continue + } + seen[r.URI] = true + result = append(result, MCPResourceInfo{ + ServerName: ns.Name, + Name: r.Name, + URI: r.URI, + Description: r.Description, + MIMEType: r.MIMEType, + Session: ns.Session, + }) + } + } + return result, nil +} + +// ReadMCPResource reads a resource by URI from the matching session. +func ReadMCPResource(ctx context.Context, resources []MCPResourceInfo, uri string) (string, error) { + var info *MCPResourceInfo + for i := range resources { + if resources[i].URI == uri { + info = &resources[i] + break + } + } + if info == nil { + return "", fmt.Errorf("MCP resource %q not found", uri) + } + + result, err := info.Session.ReadResource(ctx, &mcp.ReadResourceParams{URI: uri}) + if err != nil { + return "", fmt.Errorf("MCP resource %q read failed: %w", uri, err) + } + + var texts []string + for _, c := range result.Contents { + if c.Text != "" { + texts = append(texts, c.Text) + } + } + return strings.Join(texts, "\n"), nil +} + +// MCPPromptFromMetadata extracts the prompt name and arguments from metadata. +// The "mcp_prompt" and "mcp_prompt_args" keys are consumed (deleted from the map). +func MCPPromptFromMetadata(metadata map[string]string) (string, map[string]string) { + name, ok := metadata["mcp_prompt"] + if !ok || name == "" { + return "", nil + } + delete(metadata, "mcp_prompt") + + var args map[string]string + if raw, ok := metadata["mcp_prompt_args"]; ok && raw != "" { + json.Unmarshal([]byte(raw), &args) + delete(metadata, "mcp_prompt_args") + } + return name, args +} + +// MCPResourcesFromMetadata extracts resource URIs from metadata. +// The "mcp_resources" key is consumed (deleted from the map). +func MCPResourcesFromMetadata(metadata map[string]string) []string { + raw, ok := metadata["mcp_resources"] + if !ok || raw == "" { + return nil + } + delete(metadata, "mcp_resources") + uris := strings.Split(raw, ",") + for i := range uris { + uris[i] = strings.TrimSpace(uris[i]) + } + return uris +} + +// PromptMessageToText extracts text from a PromptMessage's Content. +func PromptMessageToText(msg *mcp.PromptMessage) string { + if tc, ok := msg.Content.(*mcp.TextContent); ok { + return tc.Text + } + // Fallback: marshal content + data, _ := json.Marshal(msg.Content) + return string(data) +} + +// CloseMCPSessions closes all MCP sessions for a given model and removes them from the cache. +// This should be called when a model is unloaded or shut down. +func CloseMCPSessions(modelName string) { + // Close sessions in the unnamed cache + cache.mu.Lock() + if sessions, ok := cache.cache[modelName]; ok { + for _, s := range sessions { + s.Close() + } + delete(cache.cache, modelName) + } + if cancel, ok := cache.cancels[modelName]; ok { + cancel() + delete(cache.cancels, modelName) + } + cache.mu.Unlock() + + // Close sessions in the named cache + namedCache.mu.Lock() + if sessions, ok := namedCache.cache[modelName]; ok { + for _, ns := range sessions { + ns.Session.Close() + } + delete(namedCache.cache, modelName) + } + if cancel, ok := namedCache.cancels[modelName]; ok { + cancel() + delete(namedCache.cancels, modelName) + } + namedCache.mu.Unlock() + + xlog.Debug("Closed MCP sessions for model", "model", modelName) +} + +// CloseAllMCPSessions closes all cached MCP sessions across all models. +// This should be called during graceful shutdown. +func CloseAllMCPSessions() { + cache.mu.Lock() + for name, sessions := range cache.cache { + for _, s := range sessions { + s.Close() + } + if cancel, ok := cache.cancels[name]; ok { + cancel() + } + } + cache.cache = make(map[string][]*mcp.ClientSession) + cache.cancels = make(map[string]context.CancelFunc) + cache.mu.Unlock() + + namedCache.mu.Lock() + for name, sessions := range namedCache.cache { + for _, ns := range sessions { + ns.Session.Close() + } + if cancel, ok := namedCache.cancels[name]; ok { + cancel() + } + } + namedCache.cache = make(map[string][]NamedSession) + namedCache.cancels = make(map[string]context.CancelFunc) + namedCache.mu.Unlock() + + xlog.Debug("Closed all MCP sessions") +} + +func init() { + signals.RegisterGracefulTerminationHandler(func() { + CloseAllMCPSessions() + }) +} + // bearerTokenRoundTripper is a custom roundtripper that injects a bearer token // into HTTP requests type bearerTokenRoundTripper struct { diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go index 238d65026..cf2f05663 100644 --- a/core/http/endpoints/openai/chat.go +++ b/core/http/endpoints/openai/chat.go @@ -10,6 +10,7 @@ import ( "github.com/labstack/echo/v4" "github.com/mudler/LocalAI/core/backend" "github.com/mudler/LocalAI/core/config" + mcpTools "github.com/mudler/LocalAI/core/http/endpoints/mcp" "github.com/mudler/LocalAI/core/http/middleware" "github.com/mudler/LocalAI/core/schema" "github.com/mudler/LocalAI/pkg/functions" @@ -22,6 +23,37 @@ import ( "github.com/mudler/xlog" ) +// mergeToolCallDeltas merges streaming tool call deltas into complete tool calls. +// In SSE streaming, a single tool call arrives as multiple chunks sharing the same Index: +// the first chunk carries the ID, Type, and Name; subsequent chunks append to Arguments. +func mergeToolCallDeltas(existing []schema.ToolCall, deltas []schema.ToolCall) []schema.ToolCall { + byIndex := make(map[int]int, len(existing)) // tool call Index -> position in slice + for i, tc := range existing { + byIndex[tc.Index] = i + } + for _, d := range deltas { + pos, found := byIndex[d.Index] + if !found { + byIndex[d.Index] = len(existing) + existing = append(existing, d) + continue + } + // Merge into existing entry + tc := &existing[pos] + if d.ID != "" { + tc.ID = d.ID + } + if d.Type != "" { + tc.Type = d.Type + } + if d.FunctionCall.Name != "" { + tc.FunctionCall.Name = d.FunctionCall.Name + } + tc.FunctionCall.Arguments += d.FunctionCall.Arguments + } + return existing +} + // ChatEndpoint is the OpenAI Completion API endpoint https://platform.openai.com/docs/api-reference/chat/create // @Summary Generate a chat completions for a given prompt and model. // @Param request body schema.OpenAIRequest true "query params" @@ -405,6 +437,100 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator shouldUseFn := len(input.Functions) > 0 && config.ShouldUseFunctions() strictMode := false + // MCP tool injection: when mcp_servers is set in metadata and model has MCP config + var mcpToolInfos []mcpTools.MCPToolInfo + mcpServers := mcpTools.MCPServersFromMetadata(input.Metadata) + + // MCP prompt and resource injection (extracted before tool injection) + mcpPromptName, mcpPromptArgs := mcpTools.MCPPromptFromMetadata(input.Metadata) + mcpResourceURIs := mcpTools.MCPResourcesFromMetadata(input.Metadata) + + if (len(mcpServers) > 0 || mcpPromptName != "" || len(mcpResourceURIs) > 0) && (config.MCP.Servers != "" || config.MCP.Stdio != "") { + remote, stdio, mcpErr := config.MCP.MCPConfigFromYAML() + if mcpErr == nil { + namedSessions, sessErr := mcpTools.NamedSessionsFromMCPConfig(config.Name, remote, stdio, mcpServers) + if sessErr == nil && len(namedSessions) > 0 { + // Prompt injection: prepend prompt messages to the conversation + if mcpPromptName != "" { + prompts, discErr := mcpTools.DiscoverMCPPrompts(c.Request().Context(), namedSessions) + if discErr == nil { + promptMsgs, getErr := mcpTools.GetMCPPrompt(c.Request().Context(), prompts, mcpPromptName, mcpPromptArgs) + if getErr == nil { + var injected []schema.Message + for _, pm := range promptMsgs { + injected = append(injected, schema.Message{ + Role: string(pm.Role), + Content: mcpTools.PromptMessageToText(pm), + }) + } + input.Messages = append(injected, input.Messages...) + xlog.Debug("MCP prompt injected", "prompt", mcpPromptName, "messages", len(injected)) + } else { + xlog.Error("Failed to get MCP prompt", "error", getErr) + } + } else { + xlog.Error("Failed to discover MCP prompts", "error", discErr) + } + } + + // Resource injection: append resource content to the last user message + if len(mcpResourceURIs) > 0 { + resources, discErr := mcpTools.DiscoverMCPResources(c.Request().Context(), namedSessions) + if discErr == nil { + var resourceTexts []string + for _, uri := range mcpResourceURIs { + content, readErr := mcpTools.ReadMCPResource(c.Request().Context(), resources, uri) + if readErr != nil { + xlog.Error("Failed to read MCP resource", "error", readErr, "uri", uri) + continue + } + // Find resource name + name := uri + for _, r := range resources { + if r.URI == uri { + name = r.Name + break + } + } + resourceTexts = append(resourceTexts, fmt.Sprintf("--- MCP Resource: %s ---\n%s", name, content)) + } + if len(resourceTexts) > 0 && len(input.Messages) > 0 { + lastIdx := len(input.Messages) - 1 + suffix := "\n\n" + strings.Join(resourceTexts, "\n\n") + switch ct := input.Messages[lastIdx].Content.(type) { + case string: + input.Messages[lastIdx].Content = ct + suffix + default: + input.Messages[lastIdx].Content = fmt.Sprintf("%v%s", ct, suffix) + } + xlog.Debug("MCP resources injected", "count", len(resourceTexts)) + } + } else { + xlog.Error("Failed to discover MCP resources", "error", discErr) + } + } + + // Tool injection + if len(mcpServers) > 0 { + discovered, discErr := mcpTools.DiscoverMCPTools(c.Request().Context(), namedSessions) + if discErr == nil { + mcpToolInfos = discovered + for _, ti := range mcpToolInfos { + funcs = append(funcs, ti.Function) + input.Tools = append(input.Tools, functions.Tool{Type: "function", Function: ti.Function}) + } + shouldUseFn = len(funcs) > 0 && config.ShouldUseFunctions() + xlog.Debug("MCP tools injected", "count", len(mcpToolInfos), "total_funcs", len(funcs)) + } else { + xlog.Error("Failed to discover MCP tools", "error", discErr) + } + } + } + } else { + xlog.Error("Failed to parse MCP config", "error", mcpErr) + } + } + xlog.Debug("Tool call routing decision", "shouldUseFn", shouldUseFn, "len(input.Functions)", len(input.Functions), @@ -552,6 +678,19 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator c.Response().Header().Set("Connection", "keep-alive") c.Response().Header().Set("X-Correlation-ID", id) + mcpStreamMaxIterations := 10 + if config.Agent.MaxIterations > 0 { + mcpStreamMaxIterations = config.Agent.MaxIterations + } + hasMCPToolsStream := len(mcpToolInfos) > 0 + + for mcpStreamIter := 0; mcpStreamIter <= mcpStreamMaxIterations; mcpStreamIter++ { + // Re-template on MCP iterations + if mcpStreamIter > 0 && !config.TemplateConfig.UseTokenizerTemplate { + predInput = evaluator.TemplateMessages(*input, input.Messages, config, funcs, shouldUseFn) + xlog.Debug("MCP stream re-templating", "iteration", mcpStreamIter) + } + responses := make(chan schema.OpenAIResponse) ended := make(chan error, 1) @@ -565,6 +704,8 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator usage := &schema.OpenAIUsage{} toolsCalled := false + var collectedToolCalls []schema.ToolCall + var collectedContent string LOOP: for { @@ -582,6 +723,18 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator usage = &ev.Usage // Copy a pointer to the latest usage chunk so that the stop message can reference it if len(ev.Choices[0].Delta.ToolCalls) > 0 { toolsCalled = true + // Collect and merge tool call deltas for MCP execution + if hasMCPToolsStream { + collectedToolCalls = mergeToolCallDeltas(collectedToolCalls, ev.Choices[0].Delta.ToolCalls) + } + } + // Collect content for MCP conversation history + if hasMCPToolsStream && ev.Choices[0].Delta != nil && ev.Choices[0].Delta.Content != nil { + if s, ok := ev.Choices[0].Delta.Content.(string); ok { + collectedContent += s + } else if sp, ok := ev.Choices[0].Delta.Content.(*string); ok && sp != nil { + collectedContent += *sp + } } respData, err := json.Marshal(ev) if err != nil { @@ -632,6 +785,64 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator } } + // MCP streaming tool execution: if we collected MCP tool calls, execute and loop + if hasMCPToolsStream && toolsCalled && len(collectedToolCalls) > 0 { + var hasMCPCalls bool + for _, tc := range collectedToolCalls { + if mcpTools.IsMCPTool(mcpToolInfos, tc.FunctionCall.Name) { + hasMCPCalls = true + break + } + } + if hasMCPCalls { + // Append assistant message with tool_calls + assistantMsg := schema.Message{ + Role: "assistant", + Content: collectedContent, + ToolCalls: collectedToolCalls, + } + input.Messages = append(input.Messages, assistantMsg) + + // Execute MCP tool calls and stream results as tool_result events + for _, tc := range collectedToolCalls { + if !mcpTools.IsMCPTool(mcpToolInfos, tc.FunctionCall.Name) { + continue + } + xlog.Debug("Executing MCP tool (stream)", "tool", tc.FunctionCall.Name, "iteration", mcpStreamIter) + toolResult, toolErr := mcpTools.ExecuteMCPToolCall( + c.Request().Context(), mcpToolInfos, + tc.FunctionCall.Name, tc.FunctionCall.Arguments, + ) + if toolErr != nil { + xlog.Error("MCP tool execution failed", "tool", tc.FunctionCall.Name, "error", toolErr) + toolResult = fmt.Sprintf("Error: %v", toolErr) + } + input.Messages = append(input.Messages, schema.Message{ + Role: "tool", + Content: toolResult, + StringContent: toolResult, + ToolCallID: tc.ID, + Name: tc.FunctionCall.Name, + }) + + // Stream tool result event to client + mcpEvent := map[string]any{ + "type": "mcp_tool_result", + "name": tc.FunctionCall.Name, + "result": toolResult, + } + if mcpEventData, err := json.Marshal(mcpEvent); err == nil { + fmt.Fprintf(c.Response().Writer, "data: %s\n\n", mcpEventData) + c.Response().Flush() + } + } + + xlog.Debug("MCP streaming tools executed, re-running inference", "iteration", mcpStreamIter) + continue // next MCP stream iteration + } + } + + // No MCP tools to execute, send final stop message finishReason := FinishReasonStop if toolsCalled && len(input.Tools) > 0 { finishReason = FinishReasonToolCalls @@ -659,9 +870,28 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator c.Response().Flush() xlog.Debug("Stream ended") return nil + } // end MCP stream iteration loop + + // Safety fallback + fmt.Fprintf(c.Response().Writer, "data: [DONE]\n\n") + c.Response().Flush() + return nil // no streaming mode default: + mcpMaxIterations := 10 + if config.Agent.MaxIterations > 0 { + mcpMaxIterations = config.Agent.MaxIterations + } + hasMCPTools := len(mcpToolInfos) > 0 + + for mcpIteration := 0; mcpIteration <= mcpMaxIterations; mcpIteration++ { + // Re-template on each MCP iteration since messages may have changed + if mcpIteration > 0 && !config.TemplateConfig.UseTokenizerTemplate { + predInput = evaluator.TemplateMessages(*input, input.Messages, config, funcs, shouldUseFn) + xlog.Debug("MCP re-templating", "iteration", mcpIteration, "prompt_len", len(predInput)) + } + // Detect if thinking token is already in prompt or template var template string if config.TemplateConfig.UseTokenizerTemplate { @@ -839,6 +1069,75 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator }) } + // MCP server-side tool execution loop: + // If we have MCP tools and the model returned tool_calls, execute MCP tools + // and re-run inference with the results appended to the conversation. + if hasMCPTools && len(result) > 0 { + var mcpCallsExecuted bool + for _, choice := range result { + if choice.Message == nil || len(choice.Message.ToolCalls) == 0 { + continue + } + // Check if any tool calls are MCP tools + var hasMCPCalls bool + for _, tc := range choice.Message.ToolCalls { + if mcpTools.IsMCPTool(mcpToolInfos, tc.FunctionCall.Name) { + hasMCPCalls = true + break + } + } + if !hasMCPCalls { + continue + } + + // Append assistant message with tool_calls to conversation + assistantContent := "" + if choice.Message.Content != nil { + if s, ok := choice.Message.Content.(string); ok { + assistantContent = s + } else if sp, ok := choice.Message.Content.(*string); ok && sp != nil { + assistantContent = *sp + } + } + assistantMsg := schema.Message{ + Role: "assistant", + Content: assistantContent, + ToolCalls: choice.Message.ToolCalls, + } + input.Messages = append(input.Messages, assistantMsg) + + // Execute each MCP tool call and append results + for _, tc := range choice.Message.ToolCalls { + if !mcpTools.IsMCPTool(mcpToolInfos, tc.FunctionCall.Name) { + continue + } + xlog.Debug("Executing MCP tool", "tool", tc.FunctionCall.Name, "arguments", tc.FunctionCall.Arguments, "iteration", mcpIteration) + toolResult, toolErr := mcpTools.ExecuteMCPToolCall( + c.Request().Context(), mcpToolInfos, + tc.FunctionCall.Name, tc.FunctionCall.Arguments, + ) + if toolErr != nil { + xlog.Error("MCP tool execution failed", "tool", tc.FunctionCall.Name, "error", toolErr) + toolResult = fmt.Sprintf("Error: %v", toolErr) + } + input.Messages = append(input.Messages, schema.Message{ + Role: "tool", + Content: toolResult, + StringContent: toolResult, + ToolCallID: tc.ID, + Name: tc.FunctionCall.Name, + }) + mcpCallsExecuted = true + } + } + + if mcpCallsExecuted { + xlog.Debug("MCP tools executed, re-running inference", "iteration", mcpIteration, "messages", len(input.Messages)) + continue // next MCP iteration + } + } + + // No MCP tools to execute (or no MCP tools configured), return response usage := schema.OpenAIUsage{ PromptTokens: tokenUsage.Prompt, CompletionTokens: tokenUsage.Completion, @@ -862,6 +1161,10 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator // Return the prediction in the response body return c.JSON(200, resp) + } // end MCP iteration loop + + // Should not reach here, but safety fallback + return fmt.Errorf("MCP iteration limit reached") } } } diff --git a/core/http/endpoints/openresponses/responses.go b/core/http/endpoints/openresponses/responses.go index cd193b67d..dd51e1a36 100644 --- a/core/http/endpoints/openresponses/responses.go +++ b/core/http/endpoints/openresponses/responses.go @@ -3,9 +3,7 @@ package openresponses import ( "context" "encoding/json" - "errors" "fmt" - "net" "strings" "time" @@ -21,8 +19,6 @@ import ( "github.com/mudler/LocalAI/pkg/model" reason "github.com/mudler/LocalAI/pkg/reasoning" "github.com/mudler/LocalAI/pkg/utils" - "github.com/mudler/cogito" - "github.com/mudler/cogito/clients" "github.com/mudler/xlog" ) @@ -104,14 +100,127 @@ func ResponsesEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, eval // Handle tools var funcs functions.Functions var shouldUseFn bool - var useMCP bool + var mcpToolInfos []mcpTools.MCPToolInfo if len(input.Tools) > 0 { - // User-provided tools funcs, shouldUseFn = convertORToolsToFunctions(input, cfg) - } else if cfg.MCP.Servers != "" || cfg.MCP.Stdio != "" { - // MCP tools (internal) - useMCP = true + } + + // MCP injection: prompts, resources, and tools + mcpServers := mcpTools.MCPServersFromMetadata(input.Metadata) + mcpPromptName, mcpPromptArgs := mcpTools.MCPPromptFromMetadata(input.Metadata) + mcpResourceURIs := mcpTools.MCPResourcesFromMetadata(input.Metadata) + + hasMCPRequest := len(mcpServers) > 0 || mcpPromptName != "" || len(mcpResourceURIs) > 0 + hasMCPConfig := cfg.MCP.Servers != "" || cfg.MCP.Stdio != "" + + if hasMCPRequest && hasMCPConfig { + remote, stdio, mcpErr := cfg.MCP.MCPConfigFromYAML() + if mcpErr == nil { + namedSessions, sessErr := mcpTools.NamedSessionsFromMCPConfig(cfg.Name, remote, stdio, mcpServers) + if sessErr == nil && len(namedSessions) > 0 { + // Prompt injection + if mcpPromptName != "" { + prompts, discErr := mcpTools.DiscoverMCPPrompts(c.Request().Context(), namedSessions) + if discErr == nil { + promptMsgs, getErr := mcpTools.GetMCPPrompt(c.Request().Context(), prompts, mcpPromptName, mcpPromptArgs) + if getErr == nil { + var injected []schema.Message + for _, pm := range promptMsgs { + injected = append(injected, schema.Message{ + Role: string(pm.Role), + Content: mcpTools.PromptMessageToText(pm), + }) + } + messages = append(injected, messages...) + xlog.Debug("Open Responses MCP prompt injected", "prompt", mcpPromptName, "messages", len(injected)) + } else { + xlog.Error("Failed to get MCP prompt", "error", getErr) + } + } + } + + // Resource injection + if len(mcpResourceURIs) > 0 { + resources, discErr := mcpTools.DiscoverMCPResources(c.Request().Context(), namedSessions) + if discErr == nil { + var resourceTexts []string + for _, uri := range mcpResourceURIs { + content, readErr := mcpTools.ReadMCPResource(c.Request().Context(), resources, uri) + if readErr != nil { + xlog.Error("Failed to read MCP resource", "error", readErr, "uri", uri) + continue + } + name := uri + for _, r := range resources { + if r.URI == uri { + name = r.Name + break + } + } + resourceTexts = append(resourceTexts, fmt.Sprintf("--- MCP Resource: %s ---\n%s", name, content)) + } + if len(resourceTexts) > 0 && len(messages) > 0 { + lastIdx := len(messages) - 1 + suffix := "\n\n" + strings.Join(resourceTexts, "\n\n") + switch ct := messages[lastIdx].Content.(type) { + case string: + messages[lastIdx].Content = ct + suffix + default: + messages[lastIdx].Content = fmt.Sprintf("%v%s", ct, suffix) + } + xlog.Debug("Open Responses MCP resources injected", "count", len(resourceTexts)) + } + } + } + + // Tool injection + if len(mcpServers) > 0 { + discovered, discErr := mcpTools.DiscoverMCPTools(c.Request().Context(), namedSessions) + if discErr == nil { + mcpToolInfos = discovered + for _, ti := range mcpToolInfos { + funcs = append(funcs, ti.Function) + input.Tools = append(input.Tools, schema.ORFunctionTool{ + Type: "function", + Name: ti.Function.Name, + Description: ti.Function.Description, + Parameters: ti.Function.Parameters, + }) + } + shouldUseFn = len(funcs) > 0 && cfg.ShouldUseFunctions() + xlog.Debug("Open Responses MCP tools injected", "count", len(mcpToolInfos), "total_funcs", len(funcs)) + } else { + xlog.Error("Failed to discover MCP tools", "error", discErr) + } + } + } + } else { + xlog.Error("Failed to parse MCP config", "error", mcpErr) + } + } else if len(input.Tools) == 0 && hasMCPConfig { + // Backward compat: model has MCP config, no user tools and no mcp_servers field + remote, stdio, mcpErr := cfg.MCP.MCPConfigFromYAML() + if mcpErr == nil { + namedSessions, sessErr := mcpTools.NamedSessionsFromMCPConfig(cfg.Name, remote, stdio, nil) + if sessErr == nil && len(namedSessions) > 0 { + discovered, discErr := mcpTools.DiscoverMCPTools(c.Request().Context(), namedSessions) + if discErr == nil { + mcpToolInfos = discovered + for _, ti := range mcpToolInfos { + funcs = append(funcs, ti.Function) + input.Tools = append(input.Tools, schema.ORFunctionTool{ + Type: "function", + Name: ti.Function.Name, + Description: ti.Function.Description, + Parameters: ti.Function.Parameters, + }) + } + shouldUseFn = len(funcs) > 0 && cfg.ShouldUseFunctions() + xlog.Debug("Open Responses MCP tools auto-activated", "count", len(mcpToolInfos)) + } + } + } } // Create OpenAI-compatible request for internal processing @@ -215,15 +324,12 @@ func ResponsesEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, eval var finalResponse *schema.ORResponseResource var bgErr error - if useMCP { - // Background MCP processing - finalResponse, bgErr = handleBackgroundMCPResponse(bgCtx, store, responseID, createdAt, input, cfg, ml, predInput, openAIReq, appConfig) - } else if input.Stream { + if input.Stream { // Background streaming processing (buffer events) - finalResponse, bgErr = handleBackgroundStream(bgCtx, store, responseID, createdAt, input, cfg, ml, cl, appConfig, predInput, openAIReq, funcs, shouldUseFn) + finalResponse, bgErr = handleBackgroundStream(bgCtx, store, responseID, createdAt, input, cfg, ml, cl, appConfig, predInput, openAIReq, funcs, shouldUseFn, mcpToolInfos, evaluator) } else { // Background non-streaming processing - finalResponse, bgErr = handleBackgroundNonStream(bgCtx, store, responseID, createdAt, input, cfg, ml, cl, appConfig, predInput, openAIReq, funcs, shouldUseFn) + finalResponse, bgErr = handleBackgroundNonStream(bgCtx, store, responseID, createdAt, input, cfg, ml, cl, appConfig, predInput, openAIReq, funcs, shouldUseFn, mcpToolInfos, evaluator) } if bgErr != nil { @@ -243,16 +349,11 @@ func ResponsesEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, eval return c.JSON(200, queuedResponse) } - if useMCP { - // Use MCP agentic loop - return handleMCPResponse(c, responseID, createdAt, input, cfg, ml, predInput, openAIReq, appConfig, shouldStore) - } - if input.Stream { - return handleOpenResponsesStream(c, responseID, createdAt, input, cfg, ml, cl, appConfig, predInput, openAIReq, funcs, shouldUseFn, shouldStore) + return handleOpenResponsesStream(c, responseID, createdAt, input, cfg, ml, cl, appConfig, predInput, openAIReq, funcs, shouldUseFn, shouldStore, mcpToolInfos, evaluator) } - return handleOpenResponsesNonStream(c, responseID, createdAt, input, cfg, ml, cl, appConfig, predInput, openAIReq, funcs, shouldUseFn, shouldStore) + return handleOpenResponsesNonStream(c, responseID, createdAt, input, cfg, ml, cl, appConfig, predInput, openAIReq, funcs, shouldUseFn, shouldStore, mcpToolInfos, evaluator, 0) } } @@ -764,163 +865,199 @@ func convertTextFormatToResponseFormat(textFormat interface{}) interface{} { } // handleBackgroundNonStream handles background non-streaming responses -func handleBackgroundNonStream(ctx context.Context, store *ResponseStore, responseID string, createdAt int64, input *schema.OpenResponsesRequest, cfg *config.ModelConfig, ml *model.ModelLoader, cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig, predInput string, openAIReq *schema.OpenAIRequest, funcs functions.Functions, shouldUseFn bool) (*schema.ORResponseResource, error) { - images := []string{} - videos := []string{} - audios := []string{} - for _, m := range openAIReq.Messages { - images = append(images, m.StringImages...) - videos = append(videos, m.StringVideos...) - audios = append(audios, m.StringAudios...) +func handleBackgroundNonStream(ctx context.Context, store *ResponseStore, responseID string, createdAt int64, input *schema.OpenResponsesRequest, cfg *config.ModelConfig, ml *model.ModelLoader, cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig, predInput string, openAIReq *schema.OpenAIRequest, funcs functions.Functions, shouldUseFn bool, mcpToolInfos []mcpTools.MCPToolInfo, evaluator *templates.Evaluator) (*schema.ORResponseResource, error) { + mcpMaxIterations := 10 + if cfg.Agent.MaxIterations > 0 { + mcpMaxIterations = cfg.Agent.MaxIterations } + hasMCPTools := len(mcpToolInfos) > 0 + var allOutputItems []schema.ORItemField - toolsJSON := serializeToolsForBackend(input.Tools) - toolChoiceJSON := "" - if input.ToolChoice != nil { - toolChoiceBytes, err := json.Marshal(input.ToolChoice) - if err == nil { - toolChoiceJSON = string(toolChoiceBytes) + for mcpIteration := 0; mcpIteration <= mcpMaxIterations; mcpIteration++ { + if mcpIteration > 0 { + predInput = evaluator.TemplateMessages(*openAIReq, openAIReq.Messages, cfg, funcs, shouldUseFn) + xlog.Debug("Background MCP re-templating", "iteration", mcpIteration) } - } - var logprobs *int - if input.TopLogprobs != nil && *input.TopLogprobs > 0 { - logprobs = input.TopLogprobs - } + images := []string{} + videos := []string{} + audios := []string{} + for _, m := range openAIReq.Messages { + images = append(images, m.StringImages...) + videos = append(videos, m.StringVideos...) + audios = append(audios, m.StringAudios...) + } - predFunc, err := backend.ModelInference( - ctx, predInput, openAIReq.Messages, images, videos, audios, ml, cfg, cl, appConfig, nil, toolsJSON, toolChoiceJSON, logprobs, input.TopLogprobs, input.LogitBias, nil) - if err != nil { - return nil, fmt.Errorf("model inference failed: %w", err) - } + toolsJSON := serializeToolsForBackend(input.Tools) + toolChoiceJSON := "" + if input.ToolChoice != nil { + toolChoiceBytes, err := json.Marshal(input.ToolChoice) + if err == nil { + toolChoiceJSON = string(toolChoiceBytes) + } + } - // Check for cancellation - select { - case <-ctx.Done(): - return nil, ctx.Err() - default: - } + var logprobs *int + if input.TopLogprobs != nil && *input.TopLogprobs > 0 { + logprobs = input.TopLogprobs + } - const maxEmptyRetries = 5 - var prediction backend.LLMResponse - var result string - for attempt := 0; attempt <= maxEmptyRetries; attempt++ { - prediction, err = predFunc() + predFunc, err := backend.ModelInference( + ctx, predInput, openAIReq.Messages, images, videos, audios, ml, cfg, cl, appConfig, nil, toolsJSON, toolChoiceJSON, logprobs, input.TopLogprobs, input.LogitBias, nil) if err != nil { - return nil, fmt.Errorf("prediction failed: %w", err) - } - result = backend.Finetune(*cfg, predInput, prediction.Response) - if result != "" || !shouldUseFn { - break + return nil, fmt.Errorf("model inference failed: %w", err) } + select { case <-ctx.Done(): return nil, ctx.Err() default: } - xlog.Warn("Open Responses background: retrying prediction due to empty backend response", "attempt", attempt+1, "maxRetries", maxEmptyRetries) - } - // Parse tool calls if using functions (same logic as regular handler) - var outputItems []schema.ORItemField - var toolCalls []schema.ToolCall + const maxEmptyRetries = 5 + var prediction backend.LLMResponse + var result string + for attempt := 0; attempt <= maxEmptyRetries; attempt++ { + prediction, err = predFunc() + if err != nil { + return nil, fmt.Errorf("prediction failed: %w", err) + } + result = backend.Finetune(*cfg, predInput, prediction.Response) + if result != "" || !shouldUseFn { + break + } + select { + case <-ctx.Done(): + return nil, ctx.Err() + default: + } + xlog.Warn("Open Responses background: retrying prediction due to empty backend response", "attempt", attempt+1, "maxRetries", maxEmptyRetries) + } - if shouldUseFn { + // Parse tool calls var funcCallResults []functions.FuncCallResults var textContent string - // Try pre-parsed tool calls from C++ autoparser first - if deltaToolCalls := functions.ToolCallsFromChatDeltas(prediction.ChatDeltas); len(deltaToolCalls) > 0 { - xlog.Debug("[ChatDeltas] OpenResponses: using pre-parsed tool calls", "count", len(deltaToolCalls)) - funcCallResults = deltaToolCalls - textContent = functions.ContentFromChatDeltas(prediction.ChatDeltas) - } else { - xlog.Debug("[ChatDeltas] OpenResponses: no pre-parsed tool calls, falling back to Go-side text parsing") - cleanedResult := functions.CleanupLLMResult(result, cfg.FunctionsConfig) - funcCallResults = functions.ParseFunctionCall(cleanedResult, cfg.FunctionsConfig) - textContent = functions.ParseTextContent(cleanedResult, cfg.FunctionsConfig) - } + if shouldUseFn { + if deltaToolCalls := functions.ToolCallsFromChatDeltas(prediction.ChatDeltas); len(deltaToolCalls) > 0 { + funcCallResults = deltaToolCalls + textContent = functions.ContentFromChatDeltas(prediction.ChatDeltas) + } else { + cleanedResult := functions.CleanupLLMResult(result, cfg.FunctionsConfig) + funcCallResults = functions.ParseFunctionCall(cleanedResult, cfg.FunctionsConfig) + textContent = functions.ParseTextContent(cleanedResult, cfg.FunctionsConfig) + } - noActionName := "answer" - if cfg.FunctionsConfig.NoActionFunctionName != "" { - noActionName = cfg.FunctionsConfig.NoActionFunctionName - } + noActionName := "answer" + if cfg.FunctionsConfig.NoActionFunctionName != "" { + noActionName = cfg.FunctionsConfig.NoActionFunctionName + } - for i, fc := range funcCallResults { - if fc.Name == noActionName { - if fc.Arguments != "" { - var args map[string]interface{} - if err := json.Unmarshal([]byte(fc.Arguments), &args); err == nil { - if msg, ok := args["message"].(string); ok && msg != "" { - textContent = msg + var toolCalls []schema.ToolCall + for i, fc := range funcCallResults { + if fc.Name == noActionName { + if fc.Arguments != "" { + var args map[string]interface{} + if err := json.Unmarshal([]byte(fc.Arguments), &args); err == nil { + if msg, ok := args["message"].(string); ok && msg != "" { + textContent = msg + } } } + continue } - continue + toolCalls = append(toolCalls, schema.ToolCall{ + Index: i, + ID: fmt.Sprintf("fc_%s", uuid.New().String()), + Type: "function", + FunctionCall: schema.FunctionCall{ + Name: fc.Name, + Arguments: fc.Arguments, + }, + }) } - toolCalls = append(toolCalls, schema.ToolCall{ - Index: i, - ID: fmt.Sprintf("fc_%s", uuid.New().String()), - Type: "function", - FunctionCall: schema.FunctionCall{ - Name: fc.Name, - Arguments: fc.Arguments, - }, - }) - } - if textContent != "" { - outputItems = append(outputItems, schema.ORItemField{ - Type: "message", - ID: fmt.Sprintf("msg_%s", uuid.New().String()), - Status: "completed", - Role: "assistant", - Content: []schema.ORContentPart{makeOutputTextPartWithLogprobs(textContent, prediction.Logprobs)}, - }) - } + // MCP tool execution + if hasMCPTools && len(toolCalls) > 0 { + var hasMCPCalls bool + for _, tc := range toolCalls { + if mcpTools.IsMCPTool(mcpToolInfos, tc.FunctionCall.Name) { + hasMCPCalls = true + break + } + } + if hasMCPCalls { + assistantMsg := schema.Message{Role: "assistant", Content: result, ToolCalls: toolCalls} + openAIReq.Messages = append(openAIReq.Messages, assistantMsg) - for _, tc := range toolCalls { - outputItems = append(outputItems, schema.ORItemField{ - Type: "function_call", - ID: fmt.Sprintf("fc_%s", uuid.New().String()), - Status: "completed", - CallID: tc.ID, - Name: tc.FunctionCall.Name, - Arguments: tc.FunctionCall.Arguments, - }) - } + for _, tc := range toolCalls { + // Emit function_call + function_call_output items + allOutputItems = append(allOutputItems, schema.ORItemField{ + Type: "function_call", ID: fmt.Sprintf("fc_%s", uuid.New().String()), + Status: "completed", CallID: tc.ID, Name: tc.FunctionCall.Name, Arguments: tc.FunctionCall.Arguments, + }) - if len(outputItems) == 0 && result != "" { - outputItems = append(outputItems, schema.ORItemField{ - Type: "message", - ID: fmt.Sprintf("msg_%s", uuid.New().String()), - Status: "completed", - Role: "assistant", + if !mcpTools.IsMCPTool(mcpToolInfos, tc.FunctionCall.Name) { + continue + } + toolResult, toolErr := mcpTools.ExecuteMCPToolCall(ctx, mcpToolInfos, tc.FunctionCall.Name, tc.FunctionCall.Arguments) + if toolErr != nil { + toolResult = fmt.Sprintf("Error: %v", toolErr) + } + openAIReq.Messages = append(openAIReq.Messages, schema.Message{ + Role: "tool", Content: toolResult, StringContent: toolResult, ToolCallID: tc.ID, Name: tc.FunctionCall.Name, + }) + allOutputItems = append(allOutputItems, schema.ORItemField{ + Type: "function_call_output", ID: fmt.Sprintf("fco_%s", uuid.New().String()), + Status: "completed", CallID: tc.ID, Output: toolResult, + }) + } + continue // next MCP iteration + } + } + + // No MCP calls, build output items + if textContent != "" { + allOutputItems = append(allOutputItems, schema.ORItemField{ + Type: "message", ID: fmt.Sprintf("msg_%s", uuid.New().String()), + Status: "completed", Role: "assistant", + Content: []schema.ORContentPart{makeOutputTextPartWithLogprobs(textContent, prediction.Logprobs)}, + }) + } + for _, tc := range toolCalls { + allOutputItems = append(allOutputItems, schema.ORItemField{ + Type: "function_call", ID: fmt.Sprintf("fc_%s", uuid.New().String()), + Status: "completed", CallID: tc.ID, Name: tc.FunctionCall.Name, Arguments: tc.FunctionCall.Arguments, + }) + } + if len(allOutputItems) == 0 && result != "" { + allOutputItems = append(allOutputItems, schema.ORItemField{ + Type: "message", ID: fmt.Sprintf("msg_%s", uuid.New().String()), + Status: "completed", Role: "assistant", + Content: []schema.ORContentPart{makeOutputTextPartWithLogprobs(result, prediction.Logprobs)}, + }) + } + } else { + allOutputItems = append(allOutputItems, schema.ORItemField{ + Type: "message", ID: fmt.Sprintf("msg_%s", uuid.New().String()), + Status: "completed", Role: "assistant", Content: []schema.ORContentPart{makeOutputTextPartWithLogprobs(result, prediction.Logprobs)}, }) } - } else { - outputItems = append(outputItems, schema.ORItemField{ - Type: "message", - ID: fmt.Sprintf("msg_%s", uuid.New().String()), - Status: "completed", - Role: "assistant", - Content: []schema.ORContentPart{makeOutputTextPartWithLogprobs(result, prediction.Logprobs)}, - }) - } - now := time.Now().Unix() - response := buildORResponse(responseID, createdAt, &now, schema.ORStatusCompleted, input, outputItems, &schema.ORUsage{ - InputTokens: prediction.Usage.Prompt, - OutputTokens: prediction.Usage.Completion, - TotalTokens: prediction.Usage.Prompt + prediction.Usage.Completion, - }, true) + now := time.Now().Unix() + return buildORResponse(responseID, createdAt, &now, schema.ORStatusCompleted, input, allOutputItems, &schema.ORUsage{ + InputTokens: prediction.Usage.Prompt, + OutputTokens: prediction.Usage.Completion, + TotalTokens: prediction.Usage.Prompt + prediction.Usage.Completion, + }, true), nil + } // end MCP iteration loop - return response, nil + return nil, fmt.Errorf("MCP iteration limit reached") } // handleBackgroundStream handles background streaming responses with event buffering -func handleBackgroundStream(ctx context.Context, store *ResponseStore, responseID string, createdAt int64, input *schema.OpenResponsesRequest, cfg *config.ModelConfig, ml *model.ModelLoader, cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig, predInput string, openAIReq *schema.OpenAIRequest, funcs functions.Functions, shouldUseFn bool) (*schema.ORResponseResource, error) { +func handleBackgroundStream(ctx context.Context, store *ResponseStore, responseID string, createdAt int64, input *schema.OpenResponsesRequest, cfg *config.ModelConfig, ml *model.ModelLoader, cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig, predInput string, openAIReq *schema.OpenAIRequest, funcs functions.Functions, shouldUseFn bool, mcpToolInfos []mcpTools.MCPToolInfo, evaluator *templates.Evaluator) (*schema.ORResponseResource, error) { images := []string{} videos := []string{} audios := []string{} @@ -961,118 +1098,264 @@ func handleBackgroundStream(ctx context.Context, store *ResponseStore, responseI var accumulatedText string var collectedOutputItems []schema.ORItemField outputIndex := 0 - currentMessageID := fmt.Sprintf("msg_%s", uuid.New().String()) - // Emit output_item.added - messageItem := &schema.ORItemField{ - Type: "message", - ID: currentMessageID, - Status: "in_progress", - Role: "assistant", - Content: []schema.ORContentPart{}, + mcpBgStreamMaxIterations := 10 + if cfg.Agent.MaxIterations > 0 { + mcpBgStreamMaxIterations = cfg.Agent.MaxIterations } - bufferEvent(store, responseID, &schema.ORStreamEvent{ - Type: "response.output_item.added", - SequenceNumber: sequenceNumber, - OutputIndex: &outputIndex, - Item: messageItem, - }) - sequenceNumber++ + hasMCPTools := len(mcpToolInfos) > 0 - // Emit content_part.added - currentContentIndex := 0 - emptyPart := makeOutputTextPart("") - bufferEvent(store, responseID, &schema.ORStreamEvent{ - Type: "response.content_part.added", - SequenceNumber: sequenceNumber, - ItemID: currentMessageID, - OutputIndex: &outputIndex, - ContentIndex: ¤tContentIndex, - Part: &emptyPart, - }) - sequenceNumber++ + var prediction backend.LLMResponse - // Token callback for streaming - tokenCallback := func(token string, tokenUsage backend.TokenUsage) bool { - select { - case <-ctx.Done(): - return false - default: + for mcpIter := 0; mcpIter <= mcpBgStreamMaxIterations; mcpIter++ { + if mcpIter > 0 { + predInput = evaluator.TemplateMessages(*openAIReq, openAIReq.Messages, cfg, funcs, shouldUseFn) + xlog.Debug("Background stream MCP re-templating", "iteration", mcpIter) + images = images[:0] + videos = videos[:0] + audios = audios[:0] + for _, m := range openAIReq.Messages { + images = append(images, m.StringImages...) + videos = append(videos, m.StringVideos...) + audios = append(audios, m.StringAudios...) + } } - accumulatedText += token + accumulatedText = "" + currentMessageID := fmt.Sprintf("msg_%s", uuid.New().String()) - // Buffer text delta + // Emit output_item.added + messageItem := &schema.ORItemField{ + Type: "message", + ID: currentMessageID, + Status: "in_progress", + Role: "assistant", + Content: []schema.ORContentPart{}, + } bufferEvent(store, responseID, &schema.ORStreamEvent{ - Type: "response.output_text.delta", + Type: "response.output_item.added", + SequenceNumber: sequenceNumber, + OutputIndex: &outputIndex, + Item: messageItem, + }) + sequenceNumber++ + + // Emit content_part.added + currentContentIndex := 0 + emptyPart := makeOutputTextPart("") + bufferEvent(store, responseID, &schema.ORStreamEvent{ + Type: "response.content_part.added", SequenceNumber: sequenceNumber, ItemID: currentMessageID, OutputIndex: &outputIndex, ContentIndex: ¤tContentIndex, - Delta: strPtr(token), - Logprobs: emptyLogprobs(), + Part: &emptyPart, }) sequenceNumber++ - return true - } - var streamLogprobs *int - if input.TopLogprobs != nil && *input.TopLogprobs > 0 { - streamLogprobs = input.TopLogprobs - } + // Token callback for streaming + tokenCallback := func(token string, tokenUsage backend.TokenUsage) bool { + select { + case <-ctx.Done(): + return false + default: + } - predFunc, err := backend.ModelInference( - ctx, predInput, openAIReq.Messages, images, videos, audios, ml, cfg, cl, appConfig, tokenCallback, toolsJSON, toolChoiceJSON, streamLogprobs, input.TopLogprobs, input.LogitBias, nil) - if err != nil { - return nil, fmt.Errorf("model inference failed: %w", err) - } + accumulatedText += token - prediction, err := predFunc() - if err != nil { - return nil, fmt.Errorf("prediction failed: %w", err) - } + // Buffer text delta + bufferEvent(store, responseID, &schema.ORStreamEvent{ + Type: "response.output_text.delta", + SequenceNumber: sequenceNumber, + ItemID: currentMessageID, + OutputIndex: &outputIndex, + ContentIndex: ¤tContentIndex, + Delta: strPtr(token), + Logprobs: emptyLogprobs(), + }) + sequenceNumber++ + return true + } - // Emit output_text.done - streamEventLogprobs := convertLogprobsForStreaming(prediction.Logprobs) - bufferEvent(store, responseID, &schema.ORStreamEvent{ - Type: "response.output_text.done", - SequenceNumber: sequenceNumber, - ItemID: currentMessageID, - OutputIndex: &outputIndex, - ContentIndex: ¤tContentIndex, - Text: strPtr(accumulatedText), - Logprobs: logprobsPtr(streamEventLogprobs), - }) - sequenceNumber++ + var streamLogprobs *int + if input.TopLogprobs != nil && *input.TopLogprobs > 0 { + streamLogprobs = input.TopLogprobs + } - // Emit content_part.done - textPart := makeOutputTextPartWithLogprobs(accumulatedText, prediction.Logprobs) - bufferEvent(store, responseID, &schema.ORStreamEvent{ - Type: "response.content_part.done", - SequenceNumber: sequenceNumber, - ItemID: currentMessageID, - OutputIndex: &outputIndex, - ContentIndex: ¤tContentIndex, - Part: &textPart, - }) - sequenceNumber++ + predFunc, err := backend.ModelInference( + ctx, predInput, openAIReq.Messages, images, videos, audios, ml, cfg, cl, appConfig, tokenCallback, toolsJSON, toolChoiceJSON, streamLogprobs, input.TopLogprobs, input.LogitBias, nil) + if err != nil { + return nil, fmt.Errorf("model inference failed: %w", err) + } - // Emit output_item.done - completedMessageItem := &schema.ORItemField{ - Type: "message", - ID: currentMessageID, - Status: "completed", - Role: "assistant", - Content: []schema.ORContentPart{makeOutputTextPartWithLogprobs(accumulatedText, prediction.Logprobs)}, - } - bufferEvent(store, responseID, &schema.ORStreamEvent{ - Type: "response.output_item.done", - SequenceNumber: sequenceNumber, - OutputIndex: &outputIndex, - Item: completedMessageItem, - }) - sequenceNumber++ - collectedOutputItems = append(collectedOutputItems, *completedMessageItem) + prediction, err = predFunc() + if err != nil { + return nil, fmt.Errorf("prediction failed: %w", err) + } + + result := backend.Finetune(*cfg, predInput, prediction.Response) + + // Check for MCP tool calls in the streamed result + if shouldUseFn && hasMCPTools { + var funcCallResults []functions.FuncCallResults + if deltaToolCalls := functions.ToolCallsFromChatDeltas(prediction.ChatDeltas); len(deltaToolCalls) > 0 { + funcCallResults = deltaToolCalls + } else { + cleanedResult := functions.CleanupLLMResult(result, cfg.FunctionsConfig) + funcCallResults = functions.ParseFunctionCall(cleanedResult, cfg.FunctionsConfig) + } + + noActionName := "answer" + if cfg.FunctionsConfig.NoActionFunctionName != "" { + noActionName = cfg.FunctionsConfig.NoActionFunctionName + } + + var toolCalls []schema.ToolCall + for i, fc := range funcCallResults { + if fc.Name == noActionName { + continue + } + toolCalls = append(toolCalls, schema.ToolCall{ + Index: i, ID: fmt.Sprintf("fc_%s", uuid.New().String()), + Type: "function", + FunctionCall: schema.FunctionCall{Name: fc.Name, Arguments: fc.Arguments}, + }) + } + + var hasMCPCalls bool + for _, tc := range toolCalls { + if mcpTools.IsMCPTool(mcpToolInfos, tc.FunctionCall.Name) { + hasMCPCalls = true + break + } + } + + if hasMCPCalls { + // Close the current message + bufferEvent(store, responseID, &schema.ORStreamEvent{ + Type: "response.output_text.done", SequenceNumber: sequenceNumber, + ItemID: currentMessageID, OutputIndex: &outputIndex, + ContentIndex: ¤tContentIndex, Text: strPtr(accumulatedText), + Logprobs: emptyLogprobs(), + }) + sequenceNumber++ + textPart := makeOutputTextPart(accumulatedText) + bufferEvent(store, responseID, &schema.ORStreamEvent{ + Type: "response.content_part.done", SequenceNumber: sequenceNumber, + ItemID: currentMessageID, OutputIndex: &outputIndex, + ContentIndex: ¤tContentIndex, Part: &textPart, + }) + sequenceNumber++ + completedMsg := &schema.ORItemField{ + Type: "message", ID: currentMessageID, Status: "completed", + Role: "assistant", Content: []schema.ORContentPart{textPart}, + } + bufferEvent(store, responseID, &schema.ORStreamEvent{ + Type: "response.output_item.done", SequenceNumber: sequenceNumber, + OutputIndex: &outputIndex, Item: completedMsg, + }) + sequenceNumber++ + collectedOutputItems = append(collectedOutputItems, *completedMsg) + + // Append assistant message with tool calls + assistantMsg := schema.Message{Role: "assistant", Content: result, ToolCalls: toolCalls} + openAIReq.Messages = append(openAIReq.Messages, assistantMsg) + + // Execute MCP tools and emit events + for _, tc := range toolCalls { + outputIndex++ + functionCallItem := &schema.ORItemField{ + Type: "function_call", ID: tc.ID, Status: "completed", + CallID: tc.ID, Name: tc.FunctionCall.Name, Arguments: tc.FunctionCall.Arguments, + } + bufferEvent(store, responseID, &schema.ORStreamEvent{ + Type: "response.output_item.added", SequenceNumber: sequenceNumber, + OutputIndex: &outputIndex, Item: functionCallItem, + }) + sequenceNumber++ + bufferEvent(store, responseID, &schema.ORStreamEvent{ + Type: "response.output_item.done", SequenceNumber: sequenceNumber, + OutputIndex: &outputIndex, Item: functionCallItem, + }) + sequenceNumber++ + collectedOutputItems = append(collectedOutputItems, *functionCallItem) + + if !mcpTools.IsMCPTool(mcpToolInfos, tc.FunctionCall.Name) { + continue + } + + xlog.Debug("Executing MCP tool (background stream)", "tool", tc.FunctionCall.Name, "iteration", mcpIter) + toolResult, toolErr := mcpTools.ExecuteMCPToolCall(ctx, mcpToolInfos, tc.FunctionCall.Name, tc.FunctionCall.Arguments) + if toolErr != nil { + toolResult = fmt.Sprintf("Error: %v", toolErr) + } + openAIReq.Messages = append(openAIReq.Messages, schema.Message{ + Role: "tool", Content: toolResult, StringContent: toolResult, ToolCallID: tc.ID, Name: tc.FunctionCall.Name, + }) + + outputIndex++ + outputItem := &schema.ORItemField{ + Type: "function_call_output", ID: fmt.Sprintf("fco_%s", uuid.New().String()), + Status: "completed", CallID: tc.ID, Output: toolResult, + } + bufferEvent(store, responseID, &schema.ORStreamEvent{ + Type: "response.output_item.added", SequenceNumber: sequenceNumber, + OutputIndex: &outputIndex, Item: outputItem, + }) + sequenceNumber++ + bufferEvent(store, responseID, &schema.ORStreamEvent{ + Type: "response.output_item.done", SequenceNumber: sequenceNumber, + OutputIndex: &outputIndex, Item: outputItem, + }) + sequenceNumber++ + collectedOutputItems = append(collectedOutputItems, *outputItem) + } + continue // next MCP iteration + } + } + + // No MCP tools — close the message and break + streamEventLogprobs := convertLogprobsForStreaming(prediction.Logprobs) + bufferEvent(store, responseID, &schema.ORStreamEvent{ + Type: "response.output_text.done", + SequenceNumber: sequenceNumber, + ItemID: currentMessageID, + OutputIndex: &outputIndex, + ContentIndex: ¤tContentIndex, + Text: strPtr(accumulatedText), + Logprobs: logprobsPtr(streamEventLogprobs), + }) + sequenceNumber++ + + textPart := makeOutputTextPartWithLogprobs(accumulatedText, prediction.Logprobs) + bufferEvent(store, responseID, &schema.ORStreamEvent{ + Type: "response.content_part.done", + SequenceNumber: sequenceNumber, + ItemID: currentMessageID, + OutputIndex: &outputIndex, + ContentIndex: ¤tContentIndex, + Part: &textPart, + }) + sequenceNumber++ + + completedMessageItem := &schema.ORItemField{ + Type: "message", + ID: currentMessageID, + Status: "completed", + Role: "assistant", + Content: []schema.ORContentPart{makeOutputTextPartWithLogprobs(accumulatedText, prediction.Logprobs)}, + } + bufferEvent(store, responseID, &schema.ORStreamEvent{ + Type: "response.output_item.done", + SequenceNumber: sequenceNumber, + OutputIndex: &outputIndex, + Item: completedMessageItem, + }) + sequenceNumber++ + collectedOutputItems = append(collectedOutputItems, *completedMessageItem) + + break + } // end MCP background stream iteration loop // Build final response now := time.Now().Unix() @@ -1092,373 +1375,6 @@ func handleBackgroundStream(ctx context.Context, store *ResponseStore, responseI return response, nil } -// handleBackgroundMCPResponse handles background MCP responses -func handleBackgroundMCPResponse(ctx context.Context, store *ResponseStore, responseID string, createdAt int64, input *schema.OpenResponsesRequest, cfg *config.ModelConfig, ml *model.ModelLoader, predInput string, openAIReq *schema.OpenAIRequest, appConfig *config.ApplicationConfig) (*schema.ORResponseResource, error) { - // Check for cancellation - select { - case <-ctx.Done(): - return nil, ctx.Err() - default: - } - - // Validate MCP config - if cfg.MCP.Servers == "" && cfg.MCP.Stdio == "" { - return nil, fmt.Errorf("no MCP servers configured") - } - - // Get MCP config from model config - remote, stdio, err := cfg.MCP.MCPConfigFromYAML() - if err != nil { - return nil, fmt.Errorf("failed to get MCP config: %w", err) - } - - // Get MCP sessions - sessions, err := mcpTools.SessionsFromMCPConfig(cfg.Name, remote, stdio) - if err != nil { - return nil, fmt.Errorf("failed to get MCP sessions: %w", err) - } - - if len(sessions) == 0 { - return nil, fmt.Errorf("no working MCP servers found") - } - - // Build fragment from messages - fragment := cogito.NewEmptyFragment() - for _, message := range openAIReq.Messages { - fragment = fragment.AddMessage(cogito.MessageRole(message.Role), message.StringContent) - } - fragmentPtr := &fragment - - // Get API address and key - _, port, err := net.SplitHostPort(appConfig.APIAddress) - if err != nil { - return nil, fmt.Errorf("failed to parse API address: %w", err) - } - apiKey := "" - if len(appConfig.ApiKeys) > 0 { - apiKey = appConfig.ApiKeys[0] - } - - // Create OpenAI LLM client - defaultLLM := clients.NewLocalAILLM(cfg.Name, apiKey, "http://127.0.0.1:"+port) - - // Build cogito options - cogitoOpts := cfg.BuildCogitoOptions() - cogitoOpts = append( - cogitoOpts, - cogito.WithContext(ctx), - cogito.WithMCPs(sessions...), - ) - - if input.Stream { - return handleBackgroundMCPStream(ctx, store, responseID, createdAt, input, cfg, defaultLLM, fragmentPtr, cogitoOpts) - } - - // Non-streaming mode - return handleBackgroundMCPNonStream(ctx, store, responseID, createdAt, input, cfg, defaultLLM, fragmentPtr, cogitoOpts) -} - -// handleBackgroundMCPNonStream handles background non-streaming MCP responses -func handleBackgroundMCPNonStream(ctx context.Context, store *ResponseStore, responseID string, createdAt int64, input *schema.OpenResponsesRequest, cfg *config.ModelConfig, defaultLLM cogito.LLM, fragment *cogito.Fragment, cogitoOpts []cogito.Option) (*schema.ORResponseResource, error) { - frag := *fragment - - // Check for cancellation - select { - case <-ctx.Done(): - return nil, ctx.Err() - default: - } - - // Set up callbacks for logging - cogitoOpts = append( - cogitoOpts, - cogito.WithStatusCallback(func(s string) { - xlog.Debug("[Open Responses MCP Background] Status", "model", cfg.Name, "status", s, "response_id", responseID) - }), - cogito.WithReasoningCallback(func(s string) { - xlog.Debug("[Open Responses MCP Background] Reasoning", "model", cfg.Name, "reasoning", s, "response_id", responseID) - }), - cogito.WithToolCallBack(func(t *cogito.ToolChoice, state *cogito.SessionState) cogito.ToolCallDecision { - xlog.Debug("[Open Responses MCP Background] Tool call", "model", cfg.Name, "tool", t.Name, "reasoning", t.Reasoning, "arguments", t.Arguments, "response_id", responseID) - return cogito.ToolCallDecision{ - Approved: true, - } - }), - cogito.WithToolCallResultCallback(func(t cogito.ToolStatus) { - xlog.Debug("[Open Responses MCP Background] Tool call result", "model", cfg.Name, "tool", t.Name, "result", t.Result, "tool_arguments", t.ToolArguments, "response_id", responseID) - }), - ) - - // Execute tools - f, err := cogito.ExecuteTools(defaultLLM, frag, cogitoOpts...) - if err != nil && !errors.Is(err, cogito.ErrNoToolSelected) { - return nil, fmt.Errorf("failed to execute tools: %w", err) - } - - // Check for cancellation - select { - case <-ctx.Done(): - return nil, ctx.Err() - default: - } - - // Convert fragment to Open Responses format - fPtr := &f - outputItems := convertCogitoFragmentToORItems(fPtr) - - // Build response with all required fields - now := time.Now().Unix() - response := buildORResponse(responseID, createdAt, &now, schema.ORStatusCompleted, input, outputItems, nil, true) - - return response, nil -} - -// handleBackgroundMCPStream handles background streaming MCP responses -func handleBackgroundMCPStream(ctx context.Context, store *ResponseStore, responseID string, createdAt int64, input *schema.OpenResponsesRequest, cfg *config.ModelConfig, defaultLLM cogito.LLM, fragment *cogito.Fragment, cogitoOpts []cogito.Option) (*schema.ORResponseResource, error) { - frag := *fragment - sequenceNumber := 0 - - // Emit response.created - responseCreated := buildORResponse(responseID, createdAt, nil, schema.ORStatusInProgress, input, []schema.ORItemField{}, nil, true) - bufferEvent(store, responseID, &schema.ORStreamEvent{ - Type: "response.created", - SequenceNumber: sequenceNumber, - Response: responseCreated, - }) - sequenceNumber++ - - // Emit response.in_progress - bufferEvent(store, responseID, &schema.ORStreamEvent{ - Type: "response.in_progress", - SequenceNumber: sequenceNumber, - Response: responseCreated, - }) - sequenceNumber++ - - // Create channels for streaming events - events := make(chan interface{}) - ended := make(chan error, 1) - var collectedOutputItems []schema.ORItemField - outputIndex := 0 - - // Set up callbacks - statusCallback := func(s string) { - select { - case <-ctx.Done(): - return - case events <- map[string]interface{}{ - "type": "status", - "message": s, - }: - } - } - - reasoningCallback := func(s string) { - select { - case <-ctx.Done(): - return - default: - } - itemID := fmt.Sprintf("reasoning_%s", uuid.New().String()) - outputIndex++ - item := &schema.ORItemField{ - Type: "reasoning", - ID: itemID, - Status: "in_progress", - } - collectedOutputItems = append(collectedOutputItems, *item) - - select { - case <-ctx.Done(): - return - case events <- map[string]interface{}{ - "type": "reasoning", - "item_id": itemID, - "output_index": outputIndex, - "content": s, - }: - } - } - - toolCallCallback := func(t *cogito.ToolChoice, state *cogito.SessionState) cogito.ToolCallDecision { - select { - case <-ctx.Done(): - return cogito.ToolCallDecision{Approved: false} - default: - } - toolCallID := fmt.Sprintf("fc_%s", uuid.New().String()) - outputIndex++ - item := &schema.ORItemField{ - Type: "function_call", - ID: toolCallID, - Status: "in_progress", - CallID: toolCallID, - Name: t.Name, - Arguments: "", - } - collectedOutputItems = append(collectedOutputItems, *item) - - select { - case <-ctx.Done(): - return cogito.ToolCallDecision{Approved: false} - case events <- map[string]interface{}{ - "type": "tool_call", - "item_id": toolCallID, - "output_index": outputIndex, - "name": t.Name, - "arguments": t.Arguments, - "reasoning": t.Reasoning, - }: - } - return cogito.ToolCallDecision{ - Approved: true, - } - } - - toolCallResultCallback := func(t cogito.ToolStatus) { - select { - case <-ctx.Done(): - return - default: - } - outputIndex++ - callID := fmt.Sprintf("fc_%s", uuid.New().String()) - item := schema.ORItemField{ - Type: "function_call_output", - ID: fmt.Sprintf("fco_%s", uuid.New().String()), - Status: "completed", - CallID: callID, - Output: t.Result, - } - collectedOutputItems = append(collectedOutputItems, item) - - select { - case <-ctx.Done(): - return - case events <- map[string]interface{}{ - "type": "tool_result", - "item_id": item.ID, - "output_index": outputIndex, - "name": t.Name, - "result": t.Result, - }: - } - } - - cogitoOpts = append(cogitoOpts, - cogito.WithStatusCallback(statusCallback), - cogito.WithReasoningCallback(reasoningCallback), - cogito.WithToolCallBack(toolCallCallback), - cogito.WithToolCallResultCallback(toolCallResultCallback), - ) - - // Execute tools in goroutine - go func() { - defer close(events) - - f, err := cogito.ExecuteTools(defaultLLM, frag, cogitoOpts...) - if err != nil && !errors.Is(err, cogito.ErrNoToolSelected) { - select { - case <-ctx.Done(): - ended <- ctx.Err() - case events <- map[string]interface{}{ - "type": "error", - "message": fmt.Sprintf("Failed to execute tools: %v", err), - }: - ended <- err - } - return - } - - // Check for cancellation - select { - case <-ctx.Done(): - ended <- ctx.Err() - return - default: - } - - // Stream final assistant message - content := f.LastMessage().Content - messageID := fmt.Sprintf("msg_%s", uuid.New().String()) - outputIndex++ - item := schema.ORItemField{ - Type: "message", - ID: messageID, - Status: "completed", - Role: "assistant", - Content: []schema.ORContentPart{makeOutputTextPart(content)}, - } - collectedOutputItems = append(collectedOutputItems, item) - - select { - case <-ctx.Done(): - ended <- ctx.Err() - case events <- map[string]interface{}{ - "type": "assistant", - "item_id": messageID, - "output_index": outputIndex, - "content": content, - }: - ended <- nil - } - }() - - // Process events from channel -LOOP: - for { - select { - case <-ctx.Done(): - break LOOP - case event := <-events: - if event == nil { - break LOOP - } - // Convert event to Open Responses format and buffer - bufferMCPEventAsOR(store, responseID, event, &sequenceNumber) - case err := <-ended: - if err != nil { - // Buffer error event - bufferEvent(store, responseID, &schema.ORStreamEvent{ - Type: "error", - SequenceNumber: sequenceNumber, - Error: &schema.ORErrorPayload{ - Type: "model_error", - Message: err.Error(), - }, - }) - sequenceNumber++ - - // Buffer failed response - responseFailed := buildORResponse(responseID, createdAt, nil, schema.ORStatusFailed, input, collectedOutputItems, nil, true) - bufferEvent(store, responseID, &schema.ORStreamEvent{ - Type: "response.failed", - SequenceNumber: sequenceNumber, - Response: responseFailed, - }) - return nil, err - } - - // Emit response.completed - now := time.Now().Unix() - responseCompleted := buildORResponse(responseID, createdAt, &now, schema.ORStatusCompleted, input, collectedOutputItems, nil, true) - bufferEvent(store, responseID, &schema.ORStreamEvent{ - Type: "response.completed", - SequenceNumber: sequenceNumber, - Response: responseCompleted, - }) - - break LOOP - } - } - - // Build final response - now := time.Now().Unix() - response := buildORResponse(responseID, createdAt, &now, schema.ORStatusCompleted, input, collectedOutputItems, nil, true) - - return response, nil -} - // bufferEvent stores an SSE event in the response store for streaming resume func bufferEvent(store *ResponseStore, responseID string, event *schema.ORStreamEvent) { if err := store.AppendEvent(responseID, event); err != nil { @@ -1467,7 +1383,14 @@ func bufferEvent(store *ResponseStore, responseID string, event *schema.ORStream } // handleOpenResponsesNonStream handles non-streaming responses -func handleOpenResponsesNonStream(c echo.Context, responseID string, createdAt int64, input *schema.OpenResponsesRequest, cfg *config.ModelConfig, ml *model.ModelLoader, cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig, predInput string, openAIReq *schema.OpenAIRequest, funcs functions.Functions, shouldUseFn bool, shouldStore bool) error { +func handleOpenResponsesNonStream(c echo.Context, responseID string, createdAt int64, input *schema.OpenResponsesRequest, cfg *config.ModelConfig, ml *model.ModelLoader, cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig, predInput string, openAIReq *schema.OpenAIRequest, funcs functions.Functions, shouldUseFn bool, shouldStore bool, mcpToolInfos []mcpTools.MCPToolInfo, evaluator *templates.Evaluator, mcpIteration int) error { + mcpMaxIterations := 10 + if cfg.Agent.MaxIterations > 0 { + mcpMaxIterations = cfg.Agent.MaxIterations + } + if mcpIteration > mcpMaxIterations { + return sendOpenResponsesError(c, 500, "server_error", "MCP iteration limit reached", "") + } images := []string{} videos := []string{} audios := []string{} @@ -1595,6 +1518,55 @@ func handleOpenResponsesNonStream(c echo.Context, responseID string, createdAt i }) } + // MCP server-side tool execution: if any tool calls are MCP tools, execute and re-run + if len(mcpToolInfos) > 0 && len(toolCalls) > 0 { + var hasMCPCalls bool + for _, tc := range toolCalls { + if mcpTools.IsMCPTool(mcpToolInfos, tc.FunctionCall.Name) { + hasMCPCalls = true + break + } + } + if hasMCPCalls { + // Append assistant message with tool_calls to conversation + assistantMsg := schema.Message{Role: "assistant", Content: result, ToolCalls: toolCalls} + openAIReq.Messages = append(openAIReq.Messages, assistantMsg) + + // Execute each MCP tool call and append results + for _, tc := range toolCalls { + if !mcpTools.IsMCPTool(mcpToolInfos, tc.FunctionCall.Name) { + continue + } + xlog.Debug("Executing MCP tool (Open Responses)", "tool", tc.FunctionCall.Name) + toolResult, toolErr := mcpTools.ExecuteMCPToolCall( + c.Request().Context(), mcpToolInfos, + tc.FunctionCall.Name, tc.FunctionCall.Arguments, + ) + if toolErr != nil { + xlog.Error("MCP tool execution failed", "tool", tc.FunctionCall.Name, "error", toolErr) + toolResult = fmt.Sprintf("Error: %v", toolErr) + } + openAIReq.Messages = append(openAIReq.Messages, schema.Message{ + Role: "tool", Content: toolResult, StringContent: toolResult, ToolCallID: tc.ID, Name: tc.FunctionCall.Name, + }) + + // Collect function_call + function_call_output items for the response + outputItems = append(outputItems, schema.ORItemField{ + Type: "function_call", ID: fmt.Sprintf("fc_%s", uuid.New().String()), + Status: "completed", CallID: tc.ID, Name: tc.FunctionCall.Name, Arguments: tc.FunctionCall.Arguments, + }) + outputItems = append(outputItems, schema.ORItemField{ + Type: "function_call_output", ID: fmt.Sprintf("fco_%s", uuid.New().String()), + Status: "completed", CallID: tc.ID, Output: toolResult, + }) + } + + // Re-template and re-run inference + predInput = evaluator.TemplateMessages(*openAIReq, openAIReq.Messages, cfg, funcs, shouldUseFn) + return handleOpenResponsesNonStream(c, responseID, createdAt, input, cfg, ml, cl, appConfig, predInput, openAIReq, funcs, shouldUseFn, shouldStore, mcpToolInfos, evaluator, mcpIteration+1) + } + } + // Add message item with text content (include logprobs if available) if textContent != "" { outputItems = append(outputItems, schema.ORItemField{ @@ -1619,8 +1591,6 @@ func handleOpenResponsesNonStream(c echo.Context, responseID string, createdAt i } // If we have no output items but the model did produce output, include the cleaned result as a message - // This handles cases where the function call parsing failed but we still have model output - // Note: reasoning item may already be added above hasMessageItem := false for _, item := range outputItems { if item.Type == "message" { @@ -1640,7 +1610,6 @@ func handleOpenResponsesNonStream(c echo.Context, responseID string, createdAt i } } else { // Simple text response (include logprobs if available) - // Note: reasoning item may already be added above messageItem := schema.ORItemField{ Type: "message", ID: fmt.Sprintf("msg_%s", uuid.New().String()), @@ -1682,7 +1651,7 @@ func handleOpenResponsesNonStream(c echo.Context, responseID string, createdAt i } // handleOpenResponsesStream handles streaming responses -func handleOpenResponsesStream(c echo.Context, responseID string, createdAt int64, input *schema.OpenResponsesRequest, cfg *config.ModelConfig, ml *model.ModelLoader, cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig, predInput string, openAIReq *schema.OpenAIRequest, funcs functions.Functions, shouldUseFn bool, shouldStore bool) error { +func handleOpenResponsesStream(c echo.Context, responseID string, createdAt int64, input *schema.OpenResponsesRequest, cfg *config.ModelConfig, ml *model.ModelLoader, cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig, predInput string, openAIReq *schema.OpenAIRequest, funcs functions.Functions, shouldUseFn bool, shouldStore bool, mcpToolInfos []mcpTools.MCPToolInfo, evaluator *templates.Evaluator) error { c.Response().Header().Set("Content-Type", "text/event-stream") c.Response().Header().Set("Cache-Control", "no-cache") c.Response().Header().Set("Connection", "keep-alive") @@ -1754,6 +1723,32 @@ func handleOpenResponsesStream(c echo.Context, responseID string, createdAt int6 var collectedOutputItems []schema.ORItemField if shouldUseFn { + mcpStreamMaxIterations := 10 + if cfg.Agent.MaxIterations > 0 { + mcpStreamMaxIterations = cfg.Agent.MaxIterations + } + hasMCPToolsStream := len(mcpToolInfos) > 0 + + var prediction backend.LLMResponse + var result, finalReasoning, finalCleanedResult string + var textContent string + var parsedToolCalls []functions.FuncCallResults + var toolCalls []functions.FuncCallResults + + for mcpStreamIter := 0; mcpStreamIter <= mcpStreamMaxIterations; mcpStreamIter++ { + if mcpStreamIter > 0 { + predInput = evaluator.TemplateMessages(*openAIReq, openAIReq.Messages, cfg, funcs, shouldUseFn) + xlog.Debug("Open Responses stream MCP re-templating", "iteration", mcpStreamIter) + images = images[:0] + videos = videos[:0] + audios = audios[:0] + for _, m := range openAIReq.Messages { + images = append(images, m.StringImages...) + videos = append(videos, m.StringVideos...) + audios = append(audios, m.StringAudios...) + } + } + // For tool calls, we need to track accumulated result and parse incrementally // We'll handle this differently - track the full result and parse tool calls accumulatedResult := "" @@ -2067,7 +2062,7 @@ func handleOpenResponsesStream(c echo.Context, responseID string, createdAt int6 return nil } - prediction, err := predFunc() + prediction, err = predFunc() if err != nil { xlog.Error("Open Responses stream prediction failed", "error", err) sendSSEEvent(c, &schema.ORStreamEvent{ @@ -2092,10 +2087,10 @@ func handleOpenResponsesStream(c echo.Context, responseID string, createdAt int6 return nil } - result := backend.Finetune(*cfg, predInput, prediction.Response) + result = backend.Finetune(*cfg, predInput, prediction.Response) // Extract reasoning from final result - finalReasoning, finalCleanedResult := reason.ExtractReasoningWithConfig(result, thinkingStartToken, cfg.ReasoningConfig) + finalReasoning, finalCleanedResult = reason.ExtractReasoningWithConfig(result, thinkingStartToken, cfg.ReasoningConfig) // Close reasoning item if it exists and wasn't closed yet if currentReasoningID != "" && finalReasoning != "" { @@ -2148,8 +2143,8 @@ func handleOpenResponsesStream(c echo.Context, responseID string, createdAt int6 } } - var parsedToolCalls []functions.FuncCallResults - var textContent string + parsedToolCalls = nil + textContent = "" // Try pre-parsed tool calls from C++ autoparser first if deltaToolCalls := functions.ToolCallsFromChatDeltas(prediction.ChatDeltas); len(deltaToolCalls) > 0 { @@ -2170,7 +2165,7 @@ func handleOpenResponsesStream(c echo.Context, responseID string, createdAt int6 } // Filter out noAction calls and extract the message - var toolCalls []functions.FuncCallResults + toolCalls = nil for _, fc := range parsedToolCalls { if fc.Name == noActionName { // This is a text response, not a tool call @@ -2189,6 +2184,91 @@ func handleOpenResponsesStream(c echo.Context, responseID string, createdAt int6 xlog.Debug("Open Responses Stream - Parsed", "toolCalls", len(toolCalls), "textContent", textContent) + // MCP streaming tool execution: check if any tool calls are MCP tools + if hasMCPToolsStream && len(toolCalls) > 0 { + var hasMCPCalls bool + for _, tc := range toolCalls { + if mcpTools.IsMCPTool(mcpToolInfos, tc.Name) { + hasMCPCalls = true + break + } + } + if hasMCPCalls { + // Build schema.ToolCall list for the assistant message + var schemaToolCalls []schema.ToolCall + for i, tc := range toolCalls { + schemaToolCalls = append(schemaToolCalls, schema.ToolCall{ + Index: i, ID: fmt.Sprintf("fc_%s", uuid.New().String()), + Type: "function", + FunctionCall: schema.FunctionCall{Name: tc.Name, Arguments: tc.Arguments}, + }) + } + assistantMsg := schema.Message{Role: "assistant", Content: result, ToolCalls: schemaToolCalls} + openAIReq.Messages = append(openAIReq.Messages, assistantMsg) + + for idx, tc := range toolCalls { + tcID := schemaToolCalls[idx].ID + + // Emit function_call item + outputIndex++ + functionCallItem := &schema.ORItemField{ + Type: "function_call", ID: tcID, Status: "completed", + CallID: tcID, Name: tc.Name, Arguments: tc.Arguments, + } + sendSSEEvent(c, &schema.ORStreamEvent{ + Type: "response.output_item.added", SequenceNumber: sequenceNumber, + OutputIndex: &outputIndex, Item: functionCallItem, + }) + sequenceNumber++ + sendSSEEvent(c, &schema.ORStreamEvent{ + Type: "response.output_item.done", SequenceNumber: sequenceNumber, + OutputIndex: &outputIndex, Item: functionCallItem, + }) + sequenceNumber++ + collectedOutputItems = append(collectedOutputItems, *functionCallItem) + + if !mcpTools.IsMCPTool(mcpToolInfos, tc.Name) { + continue + } + + // Execute MCP tool + xlog.Debug("Executing MCP tool (Open Responses stream)", "tool", tc.Name, "iteration", mcpStreamIter) + toolResult, toolErr := mcpTools.ExecuteMCPToolCall( + input.Context, mcpToolInfos, tc.Name, tc.Arguments, + ) + if toolErr != nil { + xlog.Error("MCP tool execution failed", "tool", tc.Name, "error", toolErr) + toolResult = fmt.Sprintf("Error: %v", toolErr) + } + openAIReq.Messages = append(openAIReq.Messages, schema.Message{ + Role: "tool", Content: toolResult, StringContent: toolResult, ToolCallID: tcID, Name: tc.Name, + }) + + // Emit function_call_output item + outputIndex++ + outputItem := &schema.ORItemField{ + Type: "function_call_output", ID: fmt.Sprintf("fco_%s", uuid.New().String()), + Status: "completed", CallID: tcID, Output: toolResult, + } + sendSSEEvent(c, &schema.ORStreamEvent{ + Type: "response.output_item.added", SequenceNumber: sequenceNumber, + OutputIndex: &outputIndex, Item: outputItem, + }) + sequenceNumber++ + sendSSEEvent(c, &schema.ORStreamEvent{ + Type: "response.output_item.done", SequenceNumber: sequenceNumber, + OutputIndex: &outputIndex, Item: outputItem, + }) + sequenceNumber++ + collectedOutputItems = append(collectedOutputItems, *outputItem) + } + c.Response().Flush() + xlog.Debug("MCP streaming tools executed, re-running inference", "iteration", mcpStreamIter) + continue // next MCP stream iteration + } + } + + // Convert prediction logprobs for streaming events streamEventLogprobs := convertLogprobsForStreaming(prediction.Logprobs) @@ -2278,6 +2358,9 @@ func handleOpenResponsesStream(c echo.Context, responseID string, createdAt int6 collectedOutputItems = append(collectedOutputItems, *functionCallItem) } + break // no MCP tools to execute, exit loop + } // end MCP stream iteration loop + // Build final response with all items (include reasoning first, then messages, then tool calls) var allOutputItems []schema.ORItemField // Add reasoning item if it exists @@ -2677,73 +2760,6 @@ func handleOpenResponsesStream(c echo.Context, responseID string, createdAt int6 return nil } -// handleMCPResponse handles responses using MCP agentic loop -func handleMCPResponse(c echo.Context, responseID string, createdAt int64, input *schema.OpenResponsesRequest, cfg *config.ModelConfig, ml *model.ModelLoader, predInput string, openAIReq *schema.OpenAIRequest, appConfig *config.ApplicationConfig, shouldStore bool) error { - ctx := input.Context - if ctx == nil { - ctx = c.Request().Context() - } - - // Validate MCP config - if cfg.MCP.Servers == "" && cfg.MCP.Stdio == "" { - return sendOpenResponsesError(c, 400, "invalid_request", "no MCP servers configured", "") - } - - // Get MCP config from model config - remote, stdio, err := cfg.MCP.MCPConfigFromYAML() - if err != nil { - return sendOpenResponsesError(c, 500, "server_error", fmt.Sprintf("failed to get MCP config: %v", err), "") - } - - // Get MCP sessions - sessions, err := mcpTools.SessionsFromMCPConfig(cfg.Name, remote, stdio) - if err != nil { - return sendOpenResponsesError(c, 500, "server_error", fmt.Sprintf("failed to get MCP sessions: %v", err), "") - } - - if len(sessions) == 0 { - return sendOpenResponsesError(c, 500, "server_error", "no working MCP servers found", "") - } - - // Build fragment from messages - fragment := cogito.NewEmptyFragment() - for _, message := range openAIReq.Messages { - fragment = fragment.AddMessage(cogito.MessageRole(message.Role), message.StringContent) - } - fragmentPtr := &fragment - - // Get API address and key - _, port, err := net.SplitHostPort(appConfig.APIAddress) - if err != nil { - return sendOpenResponsesError(c, 500, "server_error", fmt.Sprintf("failed to parse API address: %v", err), "") - } - apiKey := "" - if len(appConfig.ApiKeys) > 0 { - apiKey = appConfig.ApiKeys[0] - } - - ctxWithCancellation, cancel := context.WithCancel(ctx) - defer cancel() - - // Create OpenAI LLM client - defaultLLM := clients.NewLocalAILLM(cfg.Name, apiKey, "http://127.0.0.1:"+port) - - // Build cogito options - cogitoOpts := cfg.BuildCogitoOptions() - cogitoOpts = append( - cogitoOpts, - cogito.WithContext(ctxWithCancellation), - cogito.WithMCPs(sessions...), - ) - - if input.Stream { - return handleMCPStream(c, responseID, createdAt, input, cfg, defaultLLM, fragmentPtr, cogitoOpts, ctxWithCancellation, cancel, shouldStore) - } - - // Non-streaming mode - return handleMCPNonStream(c, responseID, createdAt, input, cfg, defaultLLM, fragmentPtr, cogitoOpts, ctxWithCancellation, shouldStore) -} - // sendSSEEvent sends a Server-Sent Event func sendSSEEvent(c echo.Context, event *schema.ORStreamEvent) { data, err := json.Marshal(event) @@ -2754,670 +2770,6 @@ func sendSSEEvent(c echo.Context, event *schema.ORStreamEvent) { fmt.Fprintf(c.Response().Writer, "event: %s\ndata: %s\n\n", event.Type, string(data)) } -// handleMCPNonStream handles non-streaming MCP responses -func handleMCPNonStream(c echo.Context, responseID string, createdAt int64, input *schema.OpenResponsesRequest, cfg *config.ModelConfig, defaultLLM cogito.LLM, fragment *cogito.Fragment, cogitoOpts []cogito.Option, ctx context.Context, shouldStore bool) error { - frag := *fragment - // Set up callbacks for logging - cogitoOpts = append( - cogitoOpts, - cogito.WithStatusCallback(func(s string) { - xlog.Debug("[Open Responses MCP] Status", "model", cfg.Name, "status", s) - }), - cogito.WithReasoningCallback(func(s string) { - xlog.Debug("[Open Responses MCP] Reasoning", "model", cfg.Name, "reasoning", s) - }), - cogito.WithToolCallBack(func(t *cogito.ToolChoice, state *cogito.SessionState) cogito.ToolCallDecision { - xlog.Debug("[Open Responses MCP] Tool call", "model", cfg.Name, "tool", t.Name, "reasoning", t.Reasoning, "arguments", t.Arguments) - return cogito.ToolCallDecision{ - Approved: true, - } - }), - cogito.WithToolCallResultCallback(func(t cogito.ToolStatus) { - xlog.Debug("[Open Responses MCP] Tool call result", "model", cfg.Name, "tool", t.Name, "result", t.Result, "tool_arguments", t.ToolArguments) - }), - ) - - // Execute tools - f, err := cogito.ExecuteTools(defaultLLM, frag, cogitoOpts...) - if err != nil && !errors.Is(err, cogito.ErrNoToolSelected) { - return sendOpenResponsesError(c, 500, "model_error", fmt.Sprintf("failed to execute tools: %v", err), "") - } - - // Convert fragment to Open Responses format - fPtr := &f - outputItems := convertCogitoFragmentToORItems(fPtr) - - // Build response with all required fields - now := time.Now().Unix() - response := buildORResponse(responseID, createdAt, &now, "completed", input, outputItems, nil, shouldStore) - - // Store response (if enabled) - if shouldStore { - store := GetGlobalStore() - store.Store(responseID, input, response) - } - - return c.JSON(200, response) -} - -// handleMCPStream handles streaming MCP responses -func handleMCPStream(c echo.Context, responseID string, createdAt int64, input *schema.OpenResponsesRequest, cfg *config.ModelConfig, defaultLLM cogito.LLM, fragment *cogito.Fragment, cogitoOpts []cogito.Option, ctx context.Context, cancel context.CancelFunc, shouldStore bool) error { - frag := *fragment - // Set SSE headers - c.Response().Header().Set("Content-Type", "text/event-stream") - c.Response().Header().Set("Cache-Control", "no-cache") - c.Response().Header().Set("Connection", "keep-alive") - - sequenceNumber := 0 - - // Emit response.created - use helper to create response with all required fields - responseCreated := buildORResponse(responseID, createdAt, nil, "in_progress", input, []schema.ORItemField{}, nil, shouldStore) - sendSSEEvent(c, &schema.ORStreamEvent{ - Type: "response.created", - SequenceNumber: sequenceNumber, - Response: responseCreated, - }) - sequenceNumber++ - - // Emit response.in_progress - sendSSEEvent(c, &schema.ORStreamEvent{ - Type: "response.in_progress", - SequenceNumber: sequenceNumber, - Response: responseCreated, - }) - sequenceNumber++ - - // Create channels for streaming events - events := make(chan interface{}) - ended := make(chan error, 1) - var collectedOutputItems []schema.ORItemField - outputIndex := 0 - - // Set up callbacks - statusCallback := func(s string) { - events <- map[string]interface{}{ - "type": "status", - "message": s, - } - } - - reasoningCallback := func(s string) { - itemID := fmt.Sprintf("reasoning_%s", uuid.New().String()) - outputIndex++ - item := &schema.ORItemField{ - Type: "reasoning", - ID: itemID, - Status: "in_progress", - } - collectedOutputItems = append(collectedOutputItems, *item) - - events <- map[string]interface{}{ - "type": "reasoning", - "item_id": itemID, - "output_index": outputIndex, - "content": s, - } - } - - toolCallCallback := func(t *cogito.ToolChoice, state *cogito.SessionState) cogito.ToolCallDecision { - toolCallID := fmt.Sprintf("fc_%s", uuid.New().String()) - outputIndex++ - item := &schema.ORItemField{ - Type: "function_call", - ID: toolCallID, - Status: "in_progress", - CallID: toolCallID, - Name: t.Name, - Arguments: "", - } - collectedOutputItems = append(collectedOutputItems, *item) - - events <- map[string]interface{}{ - "type": "tool_call", - "item_id": toolCallID, - "output_index": outputIndex, - "name": t.Name, - "arguments": t.Arguments, - "reasoning": t.Reasoning, - } - return cogito.ToolCallDecision{ - Approved: true, - } - } - - toolCallResultCallback := func(t cogito.ToolStatus) { - outputIndex++ - callID := fmt.Sprintf("fc_%s", uuid.New().String()) - item := schema.ORItemField{ - Type: "function_call_output", - ID: fmt.Sprintf("fco_%s", uuid.New().String()), - Status: "completed", - CallID: callID, - Output: t.Result, - } - collectedOutputItems = append(collectedOutputItems, item) - - events <- map[string]interface{}{ - "type": "tool_result", - "item_id": item.ID, - "output_index": outputIndex, - "name": t.Name, - "result": t.Result, - } - } - - cogitoOpts = append(cogitoOpts, - cogito.WithStatusCallback(statusCallback), - cogito.WithReasoningCallback(reasoningCallback), - cogito.WithToolCallBack(toolCallCallback), - cogito.WithToolCallResultCallback(toolCallResultCallback), - ) - - // Execute tools in goroutine - go func() { - defer close(events) - - f, err := cogito.ExecuteTools(defaultLLM, frag, cogitoOpts...) - if err != nil && !errors.Is(err, cogito.ErrNoToolSelected) { - events <- map[string]interface{}{ - "type": "error", - "message": fmt.Sprintf("Failed to execute tools: %v", err), - } - ended <- err - return - } - - // Stream final assistant message - content := f.LastMessage().Content - messageID := fmt.Sprintf("msg_%s", uuid.New().String()) - outputIndex++ - item := schema.ORItemField{ - Type: "message", - ID: messageID, - Status: "completed", - Role: "assistant", - Content: []schema.ORContentPart{makeOutputTextPart(content)}, - } - collectedOutputItems = append(collectedOutputItems, item) - - events <- map[string]interface{}{ - "type": "assistant", - "item_id": messageID, - "output_index": outputIndex, - "content": content, - } - - ended <- nil - }() - - // Stream events to client -LOOP: - for { - select { - case <-ctx.Done(): - cancel() - break LOOP - case event := <-events: - if event == nil { - break LOOP - } - // Convert event to Open Responses format and send - if err := sendMCPEventAsOR(c, event, &sequenceNumber); err != nil { - cancel() - return err - } - c.Response().Flush() - case err := <-ended: - if err == nil { - // Emit response.completed - now := time.Now().Unix() - responseCompleted := buildORResponse(responseID, createdAt, &now, "completed", input, collectedOutputItems, nil, shouldStore) - sendSSEEvent(c, &schema.ORStreamEvent{ - Type: "response.completed", - SequenceNumber: sequenceNumber, - Response: responseCompleted, - }) - sequenceNumber++ - - // Store response (if enabled) - if shouldStore { - store := GetGlobalStore() - store.Store(responseID, input, responseCompleted) - } - - // Send [DONE] - fmt.Fprintf(c.Response().Writer, "data: [DONE]\n\n") - c.Response().Flush() - break LOOP - } - // Send error - sendSSEEvent(c, &schema.ORStreamEvent{ - Type: "error", - SequenceNumber: sequenceNumber, - Error: &schema.ORErrorPayload{ - Type: "model_error", - Message: err.Error(), - }, - }) - sequenceNumber++ - responseFailed := buildORResponse(responseID, createdAt, nil, "failed", input, collectedOutputItems, nil, shouldStore) - sendSSEEvent(c, &schema.ORStreamEvent{ - Type: "response.failed", - SequenceNumber: sequenceNumber, - Response: responseFailed, - }) - fmt.Fprintf(c.Response().Writer, "data: [DONE]\n\n") - c.Response().Flush() - return nil - } - } - - return nil -} - -// convertCogitoFragmentToORItems converts a cogito fragment to Open Responses items -func convertCogitoFragmentToORItems(f *cogito.Fragment) []schema.ORItemField { - var items []schema.ORItemField - - // Get the last message (assistant response) - lastMsg := f.LastMessage() - if lastMsg != nil && lastMsg.Content != "" { - items = append(items, schema.ORItemField{ - Type: "message", - ID: fmt.Sprintf("msg_%s", uuid.New().String()), - Status: "completed", - Role: "assistant", - Content: []schema.ORContentPart{makeOutputTextPart(lastMsg.Content)}, - }) - } - - return items -} - -// sendMCPEventAsOR converts MCP events to Open Responses format and sends them -func sendMCPEventAsOR(c echo.Context, event interface{}, sequenceNumber *int) error { - eventMap, ok := event.(map[string]interface{}) - if !ok { - return nil - } - - eventType, _ := eventMap["type"].(string) - switch eventType { - case "status": - // Status events are informational, skip for now - return nil - case "reasoning": - itemID, _ := eventMap["item_id"].(string) - outputIndex, _ := eventMap["output_index"].(int) - - item := &schema.ORItemField{ - Type: "reasoning", - ID: itemID, - Status: "in_progress", - } - sendSSEEvent(c, &schema.ORStreamEvent{ - Type: "response.output_item.added", - SequenceNumber: *sequenceNumber, - OutputIndex: &outputIndex, - Item: item, - }) - *sequenceNumber++ - // Note: reasoning content streaming would go here - return nil - case "tool_call": - itemID, _ := eventMap["item_id"].(string) - outputIndex, _ := eventMap["output_index"].(int) - name, _ := eventMap["name"].(string) - arguments, _ := eventMap["arguments"].(string) - - item := &schema.ORItemField{ - Type: "function_call", - ID: itemID, - Status: "in_progress", - CallID: itemID, - Name: name, - Arguments: "", - } - sendSSEEvent(c, &schema.ORStreamEvent{ - Type: "response.output_item.added", - SequenceNumber: *sequenceNumber, - OutputIndex: &outputIndex, - Item: item, - }) - *sequenceNumber++ - - // Emit arguments - if arguments != "" { - sendSSEEvent(c, &schema.ORStreamEvent{ - Type: "response.function_call_arguments.delta", - SequenceNumber: *sequenceNumber, - ItemID: itemID, - OutputIndex: &outputIndex, - Delta: strPtr(arguments), - }) - *sequenceNumber++ - - item.Status = "completed" - item.Arguments = arguments - sendSSEEvent(c, &schema.ORStreamEvent{ - Type: "response.function_call_arguments.done", - SequenceNumber: *sequenceNumber, - ItemID: itemID, - OutputIndex: &outputIndex, - Arguments: strPtr(arguments), - }) - *sequenceNumber++ - - sendSSEEvent(c, &schema.ORStreamEvent{ - Type: "response.output_item.done", - SequenceNumber: *sequenceNumber, - OutputIndex: &outputIndex, - Item: item, - }) - *sequenceNumber++ - } - return nil - case "tool_result": - itemID, _ := eventMap["item_id"].(string) - outputIndex, _ := eventMap["output_index"].(int) - result, _ := eventMap["result"].(string) - - item := &schema.ORItemField{ - Type: "function_call_output", - ID: itemID, - Status: "completed", - Output: result, - } - sendSSEEvent(c, &schema.ORStreamEvent{ - Type: "response.output_item.added", - SequenceNumber: *sequenceNumber, - OutputIndex: &outputIndex, - Item: item, - }) - *sequenceNumber++ - sendSSEEvent(c, &schema.ORStreamEvent{ - Type: "response.output_item.done", - SequenceNumber: *sequenceNumber, - OutputIndex: &outputIndex, - Item: item, - }) - *sequenceNumber++ - return nil - case "assistant": - itemID, _ := eventMap["item_id"].(string) - outputIndex, _ := eventMap["output_index"].(int) - content, _ := eventMap["content"].(string) - - item := &schema.ORItemField{ - Type: "message", - ID: itemID, - Status: "in_progress", - Role: "assistant", - Content: []schema.ORContentPart{}, - } - sendSSEEvent(c, &schema.ORStreamEvent{ - Type: "response.output_item.added", - SequenceNumber: *sequenceNumber, - OutputIndex: &outputIndex, - Item: item, - }) - *sequenceNumber++ - - // Emit content part - emptyPart := makeOutputTextPart("") - sendSSEEvent(c, &schema.ORStreamEvent{ - Type: "response.content_part.added", - SequenceNumber: *sequenceNumber, - ItemID: itemID, - OutputIndex: &outputIndex, - ContentIndex: func() *int { i := 0; return &i }(), - Part: &emptyPart, - }) - *sequenceNumber++ - - // Emit text done - sendSSEEvent(c, &schema.ORStreamEvent{ - Type: "response.output_text.done", - SequenceNumber: *sequenceNumber, - ItemID: itemID, - OutputIndex: &outputIndex, - ContentIndex: func() *int { i := 0; return &i }(), - Text: strPtr(content), - Logprobs: emptyLogprobs(), - }) - *sequenceNumber++ - - // Emit content part done - contentPart := makeOutputTextPart(content) - sendSSEEvent(c, &schema.ORStreamEvent{ - Type: "response.content_part.done", - SequenceNumber: *sequenceNumber, - ItemID: itemID, - OutputIndex: &outputIndex, - ContentIndex: func() *int { i := 0; return &i }(), - Part: &contentPart, - }) - *sequenceNumber++ - - // Emit item done - item.Status = "completed" - item.Content = []schema.ORContentPart{makeOutputTextPart(content)} - sendSSEEvent(c, &schema.ORStreamEvent{ - Type: "response.output_item.done", - SequenceNumber: *sequenceNumber, - OutputIndex: &outputIndex, - Item: item, - }) - *sequenceNumber++ - return nil - case "error": - message, _ := eventMap["message"].(string) - sendSSEEvent(c, &schema.ORStreamEvent{ - Type: "error", - SequenceNumber: *sequenceNumber, - Error: &schema.ORErrorPayload{ - Type: "model_error", - Message: message, - }, - }) - *sequenceNumber++ - return nil - } - - return nil -} - -// bufferMCPEventAsOR converts MCP events to Open Responses format and buffers them -func bufferMCPEventAsOR(store *ResponseStore, responseID string, event interface{}, sequenceNumber *int) { - eventMap, ok := event.(map[string]interface{}) - if !ok { - return - } - - eventType, _ := eventMap["type"].(string) - switch eventType { - case "status": - // Status events are informational, skip for now - return - case "reasoning": - itemID, _ := eventMap["item_id"].(string) - outputIndex, _ := eventMap["output_index"].(int) - - item := &schema.ORItemField{ - Type: "reasoning", - ID: itemID, - Status: "in_progress", - } - bufferEvent(store, responseID, &schema.ORStreamEvent{ - Type: "response.output_item.added", - SequenceNumber: *sequenceNumber, - OutputIndex: &outputIndex, - Item: item, - }) - *sequenceNumber++ - // Note: reasoning content streaming would go here - return - case "tool_call": - itemID, _ := eventMap["item_id"].(string) - outputIndex, _ := eventMap["output_index"].(int) - name, _ := eventMap["name"].(string) - arguments, _ := eventMap["arguments"].(string) - - item := &schema.ORItemField{ - Type: "function_call", - ID: itemID, - Status: "in_progress", - CallID: itemID, - Name: name, - Arguments: "", - } - bufferEvent(store, responseID, &schema.ORStreamEvent{ - Type: "response.output_item.added", - SequenceNumber: *sequenceNumber, - OutputIndex: &outputIndex, - Item: item, - }) - *sequenceNumber++ - - // Emit arguments - if arguments != "" { - bufferEvent(store, responseID, &schema.ORStreamEvent{ - Type: "response.function_call_arguments.delta", - SequenceNumber: *sequenceNumber, - ItemID: itemID, - OutputIndex: &outputIndex, - Delta: strPtr(arguments), - }) - *sequenceNumber++ - - item.Status = "completed" - item.Arguments = arguments - bufferEvent(store, responseID, &schema.ORStreamEvent{ - Type: "response.function_call_arguments.done", - SequenceNumber: *sequenceNumber, - ItemID: itemID, - OutputIndex: &outputIndex, - Arguments: strPtr(arguments), - }) - *sequenceNumber++ - - bufferEvent(store, responseID, &schema.ORStreamEvent{ - Type: "response.output_item.done", - SequenceNumber: *sequenceNumber, - OutputIndex: &outputIndex, - Item: item, - }) - *sequenceNumber++ - } - return - case "tool_result": - itemID, _ := eventMap["item_id"].(string) - outputIndex, _ := eventMap["output_index"].(int) - result, _ := eventMap["result"].(string) - - item := &schema.ORItemField{ - Type: "function_call_output", - ID: itemID, - Status: "completed", - Output: result, - } - bufferEvent(store, responseID, &schema.ORStreamEvent{ - Type: "response.output_item.added", - SequenceNumber: *sequenceNumber, - OutputIndex: &outputIndex, - Item: item, - }) - *sequenceNumber++ - bufferEvent(store, responseID, &schema.ORStreamEvent{ - Type: "response.output_item.done", - SequenceNumber: *sequenceNumber, - OutputIndex: &outputIndex, - Item: item, - }) - *sequenceNumber++ - return - case "assistant": - itemID, _ := eventMap["item_id"].(string) - outputIndex, _ := eventMap["output_index"].(int) - content, _ := eventMap["content"].(string) - - item := &schema.ORItemField{ - Type: "message", - ID: itemID, - Status: "in_progress", - Role: "assistant", - Content: []schema.ORContentPart{}, - } - bufferEvent(store, responseID, &schema.ORStreamEvent{ - Type: "response.output_item.added", - SequenceNumber: *sequenceNumber, - OutputIndex: &outputIndex, - Item: item, - }) - *sequenceNumber++ - - // Emit content part - emptyPart := makeOutputTextPart("") - bufferEvent(store, responseID, &schema.ORStreamEvent{ - Type: "response.content_part.added", - SequenceNumber: *sequenceNumber, - ItemID: itemID, - OutputIndex: &outputIndex, - ContentIndex: func() *int { i := 0; return &i }(), - Part: &emptyPart, - }) - *sequenceNumber++ - - // Emit text done - bufferEvent(store, responseID, &schema.ORStreamEvent{ - Type: "response.output_text.done", - SequenceNumber: *sequenceNumber, - ItemID: itemID, - OutputIndex: &outputIndex, - ContentIndex: func() *int { i := 0; return &i }(), - Text: strPtr(content), - Logprobs: emptyLogprobs(), - }) - *sequenceNumber++ - - // Emit content part done - contentPart := makeOutputTextPart(content) - bufferEvent(store, responseID, &schema.ORStreamEvent{ - Type: "response.content_part.done", - SequenceNumber: *sequenceNumber, - ItemID: itemID, - OutputIndex: &outputIndex, - ContentIndex: func() *int { i := 0; return &i }(), - Part: &contentPart, - }) - *sequenceNumber++ - - // Emit item done - item.Status = "completed" - item.Content = []schema.ORContentPart{makeOutputTextPart(content)} - bufferEvent(store, responseID, &schema.ORStreamEvent{ - Type: "response.output_item.done", - SequenceNumber: *sequenceNumber, - OutputIndex: &outputIndex, - Item: item, - }) - *sequenceNumber++ - return - case "error": - message, _ := eventMap["message"].(string) - bufferEvent(store, responseID, &schema.ORStreamEvent{ - Type: "error", - SequenceNumber: *sequenceNumber, - Error: &schema.ORErrorPayload{ - Type: "model_error", - Message: message, - }, - }) - *sequenceNumber++ - return - } -} - // getTopLogprobs returns the top_logprobs value, defaulting to 0 if nil func getTopLogprobs(topLogprobs *int) int { if topLogprobs != nil { diff --git a/core/http/endpoints/openresponses/websocket.go b/core/http/endpoints/openresponses/websocket.go index 8c03349cc..9e6ce7109 100644 --- a/core/http/endpoints/openresponses/websocket.go +++ b/core/http/endpoints/openresponses/websocket.go @@ -308,7 +308,7 @@ func handleWSResponseCreate(connCtx context.Context, conn *lockedConn, input *sc defer close(processDone) store.UpdateStatus(responseID, schema.ORStatusInProgress, nil) - finalResponse, bgErr := handleBackgroundStream(reqCtx, store, responseID, createdAt, input, cfg, ml, cl, appConfig, predInput, openAIReq, funcs, shouldUseFn) + finalResponse, bgErr := handleBackgroundStream(reqCtx, store, responseID, createdAt, input, cfg, ml, cl, appConfig, predInput, openAIReq, funcs, shouldUseFn, nil, nil) if bgErr != nil { xlog.Error("WebSocket Responses: processing failed", "response_id", responseID, "error", bgErr) now := time.Now().Unix() diff --git a/core/http/react-ui/package-lock.json b/core/http/react-ui/package-lock.json index ca5f6c1e8..9162f53fd 100644 --- a/core/http/react-ui/package-lock.json +++ b/core/http/react-ui/package-lock.json @@ -9,6 +9,8 @@ "version": "0.0.1", "dependencies": { "@fortawesome/fontawesome-free": "^6.7.2", + "@modelcontextprotocol/ext-apps": "^1.2.2", + "@modelcontextprotocol/sdk": "^1.25.1", "dompurify": "^3.2.5", "highlight.js": "^11.11.1", "marked": "^15.0.7", @@ -859,6 +861,17 @@ "node": ">=6" } }, + "node_modules/@hono/node-server": { + "version": "1.19.11", + "resolved": "https://registry.npmjs.org/@hono/node-server/-/node-server-1.19.11.tgz", + "integrity": "sha512-dr8/3zEaB+p0D2n/IUrlPF1HZm586qgJNXK1a9fhg/PzdtkK7Ksd5l312tJX2yBuALqDYBlG20QEbayqPyxn+g==", + "engines": { + "node": ">=18.14.1" + }, + "peerDependencies": { + "hono": "^4" + } + }, "node_modules/@humanfs/core": { "version": "0.19.1", "resolved": "https://registry.npmjs.org/@humanfs/core/-/core-0.19.1.tgz", @@ -952,6 +965,87 @@ "@jridgewell/sourcemap-codec": "^1.4.14" } }, + "node_modules/@modelcontextprotocol/ext-apps": { + "version": "1.2.2", + "resolved": "https://registry.npmjs.org/@modelcontextprotocol/ext-apps/-/ext-apps-1.2.2.tgz", + "integrity": "sha512-qMnhIKb8tyPesl+kZU76Xz9Bi9putCO+LcgvBJ00fDdIniiLZsnQbAeTKoq+sTiYH1rba2Fvj8NPAFxij+gyxw==", + "engines": { + "node": ">=20" + }, + "peerDependencies": { + "@modelcontextprotocol/sdk": "^1.24.0", + "react": "^17.0.0 || ^18.0.0 || ^19.0.0", + "react-dom": "^17.0.0 || ^18.0.0 || ^19.0.0", + "zod": "^3.25.0 || ^4.0.0" + }, + "peerDependenciesMeta": { + "react": { + "optional": true + }, + "react-dom": { + "optional": true + } + } + }, + "node_modules/@modelcontextprotocol/sdk": { + "version": "1.27.1", + "resolved": "https://registry.npmjs.org/@modelcontextprotocol/sdk/-/sdk-1.27.1.tgz", + "integrity": "sha512-sr6GbP+4edBwFndLbM60gf07z0FQ79gaExpnsjMGePXqFcSSb7t6iscpjk9DhFhwd+mTEQrzNafGP8/iGGFYaA==", + "dependencies": { + "@hono/node-server": "^1.19.9", + "ajv": "^8.17.1", + "ajv-formats": "^3.0.1", + "content-type": "^1.0.5", + "cors": "^2.8.5", + "cross-spawn": "^7.0.5", + "eventsource": "^3.0.2", + "eventsource-parser": "^3.0.0", + "express": "^5.2.1", + "express-rate-limit": "^8.2.1", + "hono": "^4.11.4", + "jose": "^6.1.3", + "json-schema-typed": "^8.0.2", + "pkce-challenge": "^5.0.0", + "raw-body": "^3.0.0", + "zod": "^3.25 || ^4.0", + "zod-to-json-schema": "^3.25.1" + }, + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "@cfworker/json-schema": "^4.1.1", + "zod": "^3.25 || ^4.0" + }, + "peerDependenciesMeta": { + "@cfworker/json-schema": { + "optional": true + }, + "zod": { + "optional": false + } + } + }, + "node_modules/@modelcontextprotocol/sdk/node_modules/ajv": { + "version": "8.18.0", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.18.0.tgz", + "integrity": "sha512-PlXPeEWMXMZ7sPYOHqmDyCJzcfNrUr3fGNKtezX14ykXOEIvyK81d+qydx89KY5O71FKMPaQ2vBfBFI5NHR63A==", + "dependencies": { + "fast-deep-equal": "^3.1.3", + "fast-uri": "^3.0.1", + "json-schema-traverse": "^1.0.0", + "require-from-string": "^2.0.2" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/epoberezkin" + } + }, + "node_modules/@modelcontextprotocol/sdk/node_modules/json-schema-traverse": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz", + "integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==" + }, "node_modules/@rolldown/pluginutils": { "version": "1.0.0-beta.27", "resolved": "https://registry.npmjs.org/@rolldown/pluginutils/-/pluginutils-1.0.0-beta.27.tgz", @@ -1362,6 +1456,18 @@ "vite": "^4.2.0 || ^5.0.0 || ^6.0.0 || ^7.0.0" } }, + "node_modules/accepts": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/accepts/-/accepts-2.0.0.tgz", + "integrity": "sha512-5cvg6CtKwfgdmVqY1WIiXKc3Q1bkRqGLi+2W/6ao+6Y7gu/RCwRuAhGEzh5B4KlszSuTLgZYuqFqo5bImjNKng==", + "dependencies": { + "mime-types": "^3.0.0", + "negotiator": "^1.0.0" + }, + "engines": { + "node": ">= 0.6" + } + }, "node_modules/acorn": { "version": "8.16.0", "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.16.0.tgz", @@ -1399,6 +1505,42 @@ "url": "https://github.com/sponsors/epoberezkin" } }, + "node_modules/ajv-formats": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/ajv-formats/-/ajv-formats-3.0.1.tgz", + "integrity": "sha512-8iUql50EUR+uUcdRQ3HDqa6EVyo3docL8g5WJ3FNcWmu62IbkGUue/pEyLBW8VGKKucTPgqeks4fIU1DA4yowQ==", + "dependencies": { + "ajv": "^8.0.0" + }, + "peerDependencies": { + "ajv": "^8.0.0" + }, + "peerDependenciesMeta": { + "ajv": { + "optional": true + } + } + }, + "node_modules/ajv-formats/node_modules/ajv": { + "version": "8.18.0", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.18.0.tgz", + "integrity": "sha512-PlXPeEWMXMZ7sPYOHqmDyCJzcfNrUr3fGNKtezX14ykXOEIvyK81d+qydx89KY5O71FKMPaQ2vBfBFI5NHR63A==", + "dependencies": { + "fast-deep-equal": "^3.1.3", + "fast-uri": "^3.0.1", + "json-schema-traverse": "^1.0.0", + "require-from-string": "^2.0.2" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/epoberezkin" + } + }, + "node_modules/ajv-formats/node_modules/json-schema-traverse": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz", + "integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==" + }, "node_modules/ansi-styles": { "version": "4.3.0", "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", @@ -1438,6 +1580,29 @@ "node": ">=6.0.0" } }, + "node_modules/body-parser": { + "version": "2.2.2", + "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-2.2.2.tgz", + "integrity": "sha512-oP5VkATKlNwcgvxi0vM0p/D3n2C3EReYVX+DNYs5TjZFn/oQt2j+4sVJtSMr18pdRr8wjTcBl6LoV+FUwzPmNA==", + "dependencies": { + "bytes": "^3.1.2", + "content-type": "^1.0.5", + "debug": "^4.4.3", + "http-errors": "^2.0.0", + "iconv-lite": "^0.7.0", + "on-finished": "^2.4.1", + "qs": "^6.14.1", + "raw-body": "^3.0.1", + "type-is": "^2.0.1" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, "node_modules/brace-expansion": { "version": "1.1.12", "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz", @@ -1481,6 +1646,41 @@ "node": "^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7" } }, + "node_modules/bytes": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.2.tgz", + "integrity": "sha512-/Nf7TyzTx6S3yRJObOAV7956r8cr2+Oj8AC5dt8wSP3BQAoeX58NoHyCU8P8zGkNXStjTSi6fzO6F0pBdcYbEg==", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/call-bind-apply-helpers": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz", + "integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==", + "dependencies": { + "es-errors": "^1.3.0", + "function-bind": "^1.1.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/call-bound": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/call-bound/-/call-bound-1.0.4.tgz", + "integrity": "sha512-+ys997U96po4Kx/ABpBCqhA9EuxJaQWDQg7295H4hBphv3IZg0boBKuwYpt4YXp6MZ5AmZQnU/tyMTlRpaSejg==", + "dependencies": { + "call-bind-apply-helpers": "^1.0.2", + "get-intrinsic": "^1.3.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, "node_modules/callsites": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz", @@ -1550,6 +1750,26 @@ "integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==", "dev": true }, + "node_modules/content-disposition": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/content-disposition/-/content-disposition-1.0.1.tgz", + "integrity": "sha512-oIXISMynqSqm241k6kcQ5UwttDILMK4BiurCfGEREw6+X9jkkpEe5T9FZaApyLGGOnFuyMWZpdolTXMtvEJ08Q==", + "engines": { + "node": ">=18" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/content-type": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/content-type/-/content-type-1.0.5.tgz", + "integrity": "sha512-nTjqfcBFEipKdXCv4YDQWCfmcLZKm81ldF0pAopTvyrFGVbcR6P/VAAd5G7N+0tTr8QqiU0tFadD6FK4NtJwOA==", + "engines": { + "node": ">= 0.6" + } + }, "node_modules/convert-source-map": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/convert-source-map/-/convert-source-map-2.0.0.tgz", @@ -1568,11 +1788,34 @@ "url": "https://opencollective.com/express" } }, + "node_modules/cookie-signature": { + "version": "1.2.2", + "resolved": "https://registry.npmjs.org/cookie-signature/-/cookie-signature-1.2.2.tgz", + "integrity": "sha512-D76uU73ulSXrD1UXF4KE2TMxVVwhsnCgfAyTg9k8P6KGZjlXKrOLe4dJQKI3Bxi5wjesZoFXJWElNWBjPZMbhg==", + "engines": { + "node": ">=6.6.0" + } + }, + "node_modules/cors": { + "version": "2.8.6", + "resolved": "https://registry.npmjs.org/cors/-/cors-2.8.6.tgz", + "integrity": "sha512-tJtZBBHA6vjIAaF6EnIaq6laBBP9aq/Y3ouVJjEfoHbRBcHBAHYcMh/w8LDrk2PvIMMq8gmopa5D4V8RmbrxGw==", + "dependencies": { + "object-assign": "^4", + "vary": "^1" + }, + "engines": { + "node": ">= 0.10" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, "node_modules/cross-spawn": { "version": "7.0.6", "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz", "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==", - "dev": true, "dependencies": { "path-key": "^3.1.0", "shebang-command": "^2.0.0", @@ -1586,7 +1829,6 @@ "version": "4.4.3", "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz", "integrity": "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==", - "dev": true, "dependencies": { "ms": "^2.1.3" }, @@ -1605,6 +1847,14 @@ "integrity": "sha512-oIPzksmTg4/MriiaYGO+okXDT7ztn/w3Eptv/+gSIdMdKsJo0u4CfYNFJPy+4SKMuCqGw2wxnA+URMg3t8a/bQ==", "dev": true }, + "node_modules/depd": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/depd/-/depd-2.0.0.tgz", + "integrity": "sha512-g7nH6P6dyDioJogAAGprGpCtVImJhpPk/roCzdb3fIh61/s/nPsfR6onyMwkCAR/OlC3yBC0lESvUoQEAssIrw==", + "engines": { + "node": ">= 0.8" + } + }, "node_modules/dompurify": { "version": "3.3.2", "resolved": "https://registry.npmjs.org/dompurify/-/dompurify-3.3.2.tgz", @@ -1616,12 +1866,65 @@ "@types/trusted-types": "^2.0.7" } }, + "node_modules/dunder-proto": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz", + "integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==", + "dependencies": { + "call-bind-apply-helpers": "^1.0.1", + "es-errors": "^1.3.0", + "gopd": "^1.2.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/ee-first": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/ee-first/-/ee-first-1.1.1.tgz", + "integrity": "sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow==" + }, "node_modules/electron-to-chromium": { "version": "1.5.307", "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.307.tgz", "integrity": "sha512-5z3uFKBWjiNR44nFcYdkcXjKMbg5KXNdciu7mhTPo9tB7NbqSNP2sSnGR+fqknZSCwKkBN+oxiiajWs4dT6ORg==", "dev": true }, + "node_modules/encodeurl": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-2.0.0.tgz", + "integrity": "sha512-Q0n9HRi4m6JuGIV1eFlmvJB7ZEVxu93IrMyiMsGC0lrMJMWzRgx6WGquyfQgZVb31vhGgXnfmPNNXmxnOkRBrg==", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/es-define-property": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz", + "integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-errors": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz", + "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-object-atoms": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz", + "integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==", + "dependencies": { + "es-errors": "^1.3.0" + }, + "engines": { + "node": ">= 0.4" + } + }, "node_modules/esbuild": { "version": "0.25.12", "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.25.12.tgz", @@ -1672,6 +1975,11 @@ "node": ">=6" } }, + "node_modules/escape-html": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/escape-html/-/escape-html-1.0.3.tgz", + "integrity": "sha512-NiSupZ4OeuGwr68lGIeym/ksIZMJodUGOSCZ/FSnTxcrekbvqrgdUxlJOMpijaKZVjAJrWrGs/6Jy8OMuyj9ow==" + }, "node_modules/escape-string-regexp": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz", @@ -1851,11 +2159,104 @@ "node": ">=0.10.0" } }, + "node_modules/etag": { + "version": "1.8.1", + "resolved": "https://registry.npmjs.org/etag/-/etag-1.8.1.tgz", + "integrity": "sha512-aIL5Fx7mawVa300al2BnEE4iNvo1qETxLrPI/o05L7z6go7fCw1J6EQmbK4FmJ2AS7kgVF/KEZWufBfdClMcPg==", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/eventsource": { + "version": "3.0.7", + "resolved": "https://registry.npmjs.org/eventsource/-/eventsource-3.0.7.tgz", + "integrity": "sha512-CRT1WTyuQoD771GW56XEZFQ/ZoSfWid1alKGDYMmkt2yl8UXrVR4pspqWNEcqKvVIzg6PAltWjxcSSPrboA4iA==", + "dependencies": { + "eventsource-parser": "^3.0.1" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/eventsource-parser": { + "version": "3.0.6", + "resolved": "https://registry.npmjs.org/eventsource-parser/-/eventsource-parser-3.0.6.tgz", + "integrity": "sha512-Vo1ab+QXPzZ4tCa8SwIHJFaSzy4R6SHf7BY79rFBDf0idraZWAkYrDjDj8uWaSm3S2TK+hJ7/t1CEmZ7jXw+pg==", + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/express": { + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/express/-/express-5.2.1.tgz", + "integrity": "sha512-hIS4idWWai69NezIdRt2xFVofaF4j+6INOpJlVOLDO8zXGpUVEVzIYk12UUi2JzjEzWL3IOAxcTubgz9Po0yXw==", + "dependencies": { + "accepts": "^2.0.0", + "body-parser": "^2.2.1", + "content-disposition": "^1.0.0", + "content-type": "^1.0.5", + "cookie": "^0.7.1", + "cookie-signature": "^1.2.1", + "debug": "^4.4.0", + "depd": "^2.0.0", + "encodeurl": "^2.0.0", + "escape-html": "^1.0.3", + "etag": "^1.8.1", + "finalhandler": "^2.1.0", + "fresh": "^2.0.0", + "http-errors": "^2.0.0", + "merge-descriptors": "^2.0.0", + "mime-types": "^3.0.0", + "on-finished": "^2.4.1", + "once": "^1.4.0", + "parseurl": "^1.3.3", + "proxy-addr": "^2.0.7", + "qs": "^6.14.0", + "range-parser": "^1.2.1", + "router": "^2.2.0", + "send": "^1.1.0", + "serve-static": "^2.2.0", + "statuses": "^2.0.1", + "type-is": "^2.0.1", + "vary": "^1.1.2" + }, + "engines": { + "node": ">= 18" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/express-rate-limit": { + "version": "8.3.1", + "resolved": "https://registry.npmjs.org/express-rate-limit/-/express-rate-limit-8.3.1.tgz", + "integrity": "sha512-D1dKN+cmyPWuvB+G2SREQDzPY1agpBIcTa9sJxOPMCNeH3gwzhqJRDWCXW3gg0y//+LQ/8j52JbMROWyrKdMdw==", + "dependencies": { + "ip-address": "10.1.0" + }, + "engines": { + "node": ">= 16" + }, + "funding": { + "url": "https://github.com/sponsors/express-rate-limit" + }, + "peerDependencies": { + "express": ">= 4.11" + } + }, + "node_modules/express/node_modules/cookie": { + "version": "0.7.2", + "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.7.2.tgz", + "integrity": "sha512-yki5XnKuf750l50uGTllt6kKILY4nQ1eNIQatoXEByZ5dWgnKqbnqmTrBE5B4N7lrMJKQ2ytWMiTO2o0v6Ew/w==", + "engines": { + "node": ">= 0.6" + } + }, "node_modules/fast-deep-equal": { "version": "3.1.3", "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz", - "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==", - "dev": true + "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==" }, "node_modules/fast-json-stable-stringify": { "version": "2.1.0", @@ -1869,6 +2270,21 @@ "integrity": "sha512-DCXu6Ifhqcks7TZKY3Hxp3y6qphY5SJZmrWMDrKcERSOXWQdMhU9Ig/PYrzyw/ul9jOIyh0N4M0tbC5hodg8dw==", "dev": true }, + "node_modules/fast-uri": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/fast-uri/-/fast-uri-3.1.0.tgz", + "integrity": "sha512-iPeeDKJSWf4IEOasVVrknXpaBV0IApz/gp7S2bb7Z4Lljbl2MGJRqInZiUrQwV16cpzw/D3S5j5Julj/gT52AA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fastify" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/fastify" + } + ] + }, "node_modules/fdir": { "version": "6.5.0", "resolved": "https://registry.npmjs.org/fdir/-/fdir-6.5.0.tgz", @@ -1898,6 +2314,26 @@ "node": ">=16.0.0" } }, + "node_modules/finalhandler": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-2.1.1.tgz", + "integrity": "sha512-S8KoZgRZN+a5rNwqTxlZZePjT/4cnm0ROV70LedRHZ0p8u9fRID0hJUZQpkKLzro8LfmC8sx23bY6tVNxv8pQA==", + "dependencies": { + "debug": "^4.4.0", + "encodeurl": "^2.0.0", + "escape-html": "^1.0.3", + "on-finished": "^2.4.1", + "parseurl": "^1.3.3", + "statuses": "^2.0.1" + }, + "engines": { + "node": ">= 18.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, "node_modules/find-up": { "version": "5.0.0", "resolved": "https://registry.npmjs.org/find-up/-/find-up-5.0.0.tgz", @@ -1933,6 +2369,22 @@ "integrity": "sha512-3+mMldrTAPdta5kjX2G2J7iX4zxtnwpdA8Tr2ZSjkyPSanvbZAcy6flmtnXbEybHrDcU9641lxrMfFuUxVz9vA==", "dev": true }, + "node_modules/forwarded": { + "version": "0.2.0", + "resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.2.0.tgz", + "integrity": "sha512-buRG0fpBtRHSTCOASe6hD258tEubFoRLb4ZNA6NxMVHNw2gOcwHo9wyablzMzOA5z9xA9L1KNjk/Nt6MT9aYow==", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/fresh": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/fresh/-/fresh-2.0.0.tgz", + "integrity": "sha512-Rx/WycZ60HOaqLKAi6cHRKKI7zxWbJ31MhntmtwMoaTeF7XFH9hhBp8vITaMidfljRQ6eYWCKkaTK+ykVJHP2A==", + "engines": { + "node": ">= 0.8" + } + }, "node_modules/fsevents": { "version": "2.3.3", "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", @@ -1947,6 +2399,14 @@ "node": "^8.16.0 || ^10.6.0 || >=11.0.0" } }, + "node_modules/function-bind": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz", + "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==", + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, "node_modules/gensync": { "version": "1.0.0-beta.2", "resolved": "https://registry.npmjs.org/gensync/-/gensync-1.0.0-beta.2.tgz", @@ -1956,6 +2416,41 @@ "node": ">=6.9.0" } }, + "node_modules/get-intrinsic": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz", + "integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==", + "dependencies": { + "call-bind-apply-helpers": "^1.0.2", + "es-define-property": "^1.0.1", + "es-errors": "^1.3.0", + "es-object-atoms": "^1.1.1", + "function-bind": "^1.1.2", + "get-proto": "^1.0.1", + "gopd": "^1.2.0", + "has-symbols": "^1.1.0", + "hasown": "^2.0.2", + "math-intrinsics": "^1.1.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/get-proto": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz", + "integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==", + "dependencies": { + "dunder-proto": "^1.0.1", + "es-object-atoms": "^1.0.0" + }, + "engines": { + "node": ">= 0.4" + } + }, "node_modules/glob-parent": { "version": "6.0.2", "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-6.0.2.tgz", @@ -1980,6 +2475,17 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/gopd": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz", + "integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, "node_modules/has-flag": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", @@ -1989,6 +2495,28 @@ "node": ">=8" } }, + "node_modules/has-symbols": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz", + "integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/hasown": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz", + "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==", + "dependencies": { + "function-bind": "^1.1.2" + }, + "engines": { + "node": ">= 0.4" + } + }, "node_modules/highlight.js": { "version": "11.11.1", "resolved": "https://registry.npmjs.org/highlight.js/-/highlight.js-11.11.1.tgz", @@ -1997,6 +2525,48 @@ "node": ">=12.0.0" } }, + "node_modules/hono": { + "version": "4.12.7", + "resolved": "https://registry.npmjs.org/hono/-/hono-4.12.7.tgz", + "integrity": "sha512-jq9l1DM0zVIvsm3lv9Nw9nlJnMNPOcAtsbsgiUhWcFzPE99Gvo6yRTlszSLLYacMeQ6quHD6hMfId8crVHvexw==", + "engines": { + "node": ">=16.9.0" + } + }, + "node_modules/http-errors": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/http-errors/-/http-errors-2.0.1.tgz", + "integrity": "sha512-4FbRdAX+bSdmo4AUFuS0WNiPz8NgFt+r8ThgNWmlrjQjt1Q7ZR9+zTlce2859x4KSXrwIsaeTqDoKQmtP8pLmQ==", + "dependencies": { + "depd": "~2.0.0", + "inherits": "~2.0.4", + "setprototypeof": "~1.2.0", + "statuses": "~2.0.2", + "toidentifier": "~1.0.1" + }, + "engines": { + "node": ">= 0.8" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/iconv-lite": { + "version": "0.7.2", + "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.7.2.tgz", + "integrity": "sha512-im9DjEDQ55s9fL4EYzOAv0yMqmMBSZp6G0VvFyTMPKWxiSBHUj9NW/qqLmXUwXrrM7AvqSlTCfvqRb0cM8yYqw==", + "dependencies": { + "safer-buffer": ">= 2.1.2 < 3.0.0" + }, + "engines": { + "node": ">=0.10.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, "node_modules/ignore": { "version": "5.3.2", "resolved": "https://registry.npmjs.org/ignore/-/ignore-5.3.2.tgz", @@ -2031,6 +2601,27 @@ "node": ">=0.8.19" } }, + "node_modules/inherits": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", + "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==" + }, + "node_modules/ip-address": { + "version": "10.1.0", + "resolved": "https://registry.npmjs.org/ip-address/-/ip-address-10.1.0.tgz", + "integrity": "sha512-XXADHxXmvT9+CRxhXg56LJovE+bmWnEWB78LB83VZTprKTmaC5QfruXocxzTZ2Kl0DNwKuBdlIhjL8LeY8Sf8Q==", + "engines": { + "node": ">= 12" + } + }, + "node_modules/ipaddr.js": { + "version": "1.9.1", + "resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-1.9.1.tgz", + "integrity": "sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g==", + "engines": { + "node": ">= 0.10" + } + }, "node_modules/is-extglob": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz", @@ -2052,11 +2643,23 @@ "node": ">=0.10.0" } }, + "node_modules/is-promise": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/is-promise/-/is-promise-4.0.0.tgz", + "integrity": "sha512-hvpoI6korhJMnej285dSg6nu1+e6uxs7zG3BYAm5byqDsgJNWwxzM6z6iZiAgQR4TJ30JmBTOwqZUw3WlyH3AQ==" + }, "node_modules/isexe": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", - "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==", - "dev": true + "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==" + }, + "node_modules/jose": { + "version": "6.2.1", + "resolved": "https://registry.npmjs.org/jose/-/jose-6.2.1.tgz", + "integrity": "sha512-jUaKr1yrbfaImV7R2TN/b3IcZzsw38/chqMpo2XJ7i2F8AfM/lA4G1goC3JVEwg0H7UldTmSt3P68nt31W7/mw==", + "funding": { + "url": "https://github.com/sponsors/panva" + } }, "node_modules/js-tokens": { "version": "4.0.0", @@ -2100,6 +2703,11 @@ "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==", "dev": true }, + "node_modules/json-schema-typed": { + "version": "8.0.2", + "resolved": "https://registry.npmjs.org/json-schema-typed/-/json-schema-typed-8.0.2.tgz", + "integrity": "sha512-fQhoXdcvc3V28x7C7BMs4P5+kNlgUURe2jmUT1T//oBRMDrqy1QPelJimwZGo7Hg9VPV3EQV5Bnq4hbFy2vetA==" + }, "node_modules/json-stable-stringify-without-jsonify": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/json-stable-stringify-without-jsonify/-/json-stable-stringify-without-jsonify-1.0.1.tgz", @@ -2181,6 +2789,56 @@ "node": ">= 18" } }, + "node_modules/math-intrinsics": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz", + "integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/media-typer": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/media-typer/-/media-typer-1.1.0.tgz", + "integrity": "sha512-aisnrDP4GNe06UcKFnV5bfMNPBUw4jsLGaWwWfnH3v02GnBuXX2MCVn5RbrWo0j3pczUilYblq7fQ7Nw2t5XKw==", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/merge-descriptors": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-2.0.0.tgz", + "integrity": "sha512-Snk314V5ayFLhp3fkUREub6WtjBfPdCPY1Ln8/8munuLuiYhsABgBVWsozAG+MWMbVEvcdcpbi9R7ww22l9Q3g==", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/mime-db": { + "version": "1.54.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.54.0.tgz", + "integrity": "sha512-aU5EJuIN2WDemCcAp2vFBfp/m4EAhWJnUNSSw0ixs7/kXbd6Pg64EmwJkNdFhB8aWt1sH2CTXrLxo/iAGV3oPQ==", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/mime-types": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-3.0.2.tgz", + "integrity": "sha512-Lbgzdk0h4juoQ9fCKXW4by0UJqj+nOOrI9MJ1sSj4nI8aI2eo1qmvQEie4VD1glsS250n15LsWsYtCugiStS5A==", + "dependencies": { + "mime-db": "^1.54.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, "node_modules/minimatch": { "version": "3.1.5", "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.5.tgz", @@ -2196,8 +2854,7 @@ "node_modules/ms": { "version": "2.1.3", "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", - "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", - "dev": true + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==" }, "node_modules/nanoid": { "version": "3.3.11", @@ -2223,12 +2880,58 @@ "integrity": "sha512-OWND8ei3VtNC9h7V60qff3SVobHr996CTwgxubgyQYEpg290h9J0buyECNNJexkFm5sOajh5G116RYA1c8ZMSw==", "dev": true }, + "node_modules/negotiator": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/negotiator/-/negotiator-1.0.0.tgz", + "integrity": "sha512-8Ofs/AUQh8MaEcrlq5xOX0CQ9ypTF5dl78mjlMNfOK08fzpgTHQRQPBxcPlEtIw0yRpws+Zo/3r+5WRby7u3Gg==", + "engines": { + "node": ">= 0.6" + } + }, "node_modules/node-releases": { "version": "2.0.36", "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.36.tgz", "integrity": "sha512-TdC8FSgHz8Mwtw9g5L4gR/Sh9XhSP/0DEkQxfEFXOpiul5IiHgHan2VhYYb6agDSfp4KuvltmGApc8HMgUrIkA==", "dev": true }, + "node_modules/object-assign": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", + "integrity": "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/object-inspect": { + "version": "1.13.4", + "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.4.tgz", + "integrity": "sha512-W67iLl4J2EXEGTbfeHCffrjDfitvLANg0UlX3wFUUSTx92KXRFegMHUVgSqE+wvhAbi4WqjGg9czysTV2Epbew==", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/on-finished": { + "version": "2.4.1", + "resolved": "https://registry.npmjs.org/on-finished/-/on-finished-2.4.1.tgz", + "integrity": "sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg==", + "dependencies": { + "ee-first": "1.1.1" + }, + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/once": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", + "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==", + "dependencies": { + "wrappy": "1" + } + }, "node_modules/optionator": { "version": "0.9.4", "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.4.tgz", @@ -2288,6 +2991,14 @@ "node": ">=6" } }, + "node_modules/parseurl": { + "version": "1.3.3", + "resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.3.tgz", + "integrity": "sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==", + "engines": { + "node": ">= 0.8" + } + }, "node_modules/path-exists": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz", @@ -2301,11 +3012,19 @@ "version": "3.1.1", "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz", "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==", - "dev": true, "engines": { "node": ">=8" } }, + "node_modules/path-to-regexp": { + "version": "8.3.0", + "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-8.3.0.tgz", + "integrity": "sha512-7jdwVIRtsP8MYpdXSwOS0YdD0Du+qOoF/AEPIt88PcCFrZCzx41oxku1jD88hZBwbNUIEfpqvuhjFaMAqMTWnA==", + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, "node_modules/picocolors": { "version": "1.1.1", "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz", @@ -2324,6 +3043,14 @@ "url": "https://github.com/sponsors/jonschlinkert" } }, + "node_modules/pkce-challenge": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/pkce-challenge/-/pkce-challenge-5.0.1.tgz", + "integrity": "sha512-wQ0b/W4Fr01qtpHlqSqspcj3EhBvimsdh0KlHhH8HRZnMsEa0ea2fTULOXOS9ccQr3om+GcGRk4e+isrZWV8qQ==", + "engines": { + "node": ">=16.20.0" + } + }, "node_modules/postcss": { "version": "8.5.8", "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.8.tgz", @@ -2361,6 +3088,18 @@ "node": ">= 0.8.0" } }, + "node_modules/proxy-addr": { + "version": "2.0.7", + "resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.7.tgz", + "integrity": "sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg==", + "dependencies": { + "forwarded": "0.2.0", + "ipaddr.js": "1.9.1" + }, + "engines": { + "node": ">= 0.10" + } + }, "node_modules/punycode": { "version": "2.3.1", "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz", @@ -2370,6 +3109,42 @@ "node": ">=6" } }, + "node_modules/qs": { + "version": "6.15.0", + "resolved": "https://registry.npmjs.org/qs/-/qs-6.15.0.tgz", + "integrity": "sha512-mAZTtNCeetKMH+pSjrb76NAM8V9a05I9aBZOHztWy/UqcJdQYNsf59vrRKWnojAT9Y+GbIvoTBC++CPHqpDBhQ==", + "dependencies": { + "side-channel": "^1.1.0" + }, + "engines": { + "node": ">=0.6" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/range-parser": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/range-parser/-/range-parser-1.2.1.tgz", + "integrity": "sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg==", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/raw-body": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/raw-body/-/raw-body-3.0.2.tgz", + "integrity": "sha512-K5zQjDllxWkf7Z5xJdV0/B0WTNqx6vxG70zJE4N0kBs4LovmEYWJzQGxC9bS9RAKu3bgM40lrd5zoLJ12MQ5BA==", + "dependencies": { + "bytes": "~3.1.2", + "http-errors": "~2.0.1", + "iconv-lite": "~0.7.0", + "unpipe": "~1.0.0" + }, + "engines": { + "node": ">= 0.10" + } + }, "node_modules/react": { "version": "19.2.4", "resolved": "https://registry.npmjs.org/react/-/react-19.2.4.tgz", @@ -2434,6 +3209,14 @@ "react-dom": ">=18" } }, + "node_modules/require-from-string": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/require-from-string/-/require-from-string-2.0.2.tgz", + "integrity": "sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==", + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/resolve-from": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-4.0.0.tgz", @@ -2487,6 +3270,26 @@ "fsevents": "~2.3.2" } }, + "node_modules/router": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/router/-/router-2.2.0.tgz", + "integrity": "sha512-nLTrUKm2UyiL7rlhapu/Zl45FwNgkZGaCpZbIHajDYgwlJCOzLSk+cIPAnsEqV955GjILJnKbdQC1nVPz+gAYQ==", + "dependencies": { + "debug": "^4.4.0", + "depd": "^2.0.0", + "is-promise": "^4.0.0", + "parseurl": "^1.3.3", + "path-to-regexp": "^8.0.0" + }, + "engines": { + "node": ">= 18" + } + }, + "node_modules/safer-buffer": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", + "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==" + }, "node_modules/scheduler": { "version": "0.27.0", "resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.27.0.tgz", @@ -2501,16 +3304,63 @@ "semver": "bin/semver.js" } }, + "node_modules/send": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/send/-/send-1.2.1.tgz", + "integrity": "sha512-1gnZf7DFcoIcajTjTwjwuDjzuz4PPcY2StKPlsGAQ1+YH20IRVrBaXSWmdjowTJ6u8Rc01PoYOGHXfP1mYcZNQ==", + "dependencies": { + "debug": "^4.4.3", + "encodeurl": "^2.0.0", + "escape-html": "^1.0.3", + "etag": "^1.8.1", + "fresh": "^2.0.0", + "http-errors": "^2.0.1", + "mime-types": "^3.0.2", + "ms": "^2.1.3", + "on-finished": "^2.4.1", + "range-parser": "^1.2.1", + "statuses": "^2.0.2" + }, + "engines": { + "node": ">= 18" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/serve-static": { + "version": "2.2.1", + "resolved": "https://registry.npmjs.org/serve-static/-/serve-static-2.2.1.tgz", + "integrity": "sha512-xRXBn0pPqQTVQiC8wyQrKs2MOlX24zQ0POGaj0kultvoOCstBQM5yvOhAVSUwOMjQtTvsPWoNCHfPGwaaQJhTw==", + "dependencies": { + "encodeurl": "^2.0.0", + "escape-html": "^1.0.3", + "parseurl": "^1.3.3", + "send": "^1.2.0" + }, + "engines": { + "node": ">= 18" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, "node_modules/set-cookie-parser": { "version": "2.7.2", "resolved": "https://registry.npmjs.org/set-cookie-parser/-/set-cookie-parser-2.7.2.tgz", "integrity": "sha512-oeM1lpU/UvhTxw+g3cIfxXHyJRc/uidd3yK1P242gzHds0udQBYzs3y8j4gCCW+ZJ7ad0yctld8RYO+bdurlvw==" }, + "node_modules/setprototypeof": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.2.0.tgz", + "integrity": "sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw==" + }, "node_modules/shebang-command": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==", - "dev": true, "dependencies": { "shebang-regex": "^3.0.0" }, @@ -2522,11 +3372,78 @@ "version": "3.0.0", "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz", "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==", - "dev": true, "engines": { "node": ">=8" } }, + "node_modules/side-channel": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.1.0.tgz", + "integrity": "sha512-ZX99e6tRweoUXqR+VBrslhda51Nh5MTQwou5tnUDgbtyM0dBgmhEDtWGP/xbKn6hqfPRHujUNwz5fy/wbbhnpw==", + "dependencies": { + "es-errors": "^1.3.0", + "object-inspect": "^1.13.3", + "side-channel-list": "^1.0.0", + "side-channel-map": "^1.0.1", + "side-channel-weakmap": "^1.0.2" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/side-channel-list": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/side-channel-list/-/side-channel-list-1.0.0.tgz", + "integrity": "sha512-FCLHtRD/gnpCiCHEiJLOwdmFP+wzCmDEkc9y7NsYxeF4u7Btsn1ZuwgwJGxImImHicJArLP4R0yX4c2KCrMrTA==", + "dependencies": { + "es-errors": "^1.3.0", + "object-inspect": "^1.13.3" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/side-channel-map": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/side-channel-map/-/side-channel-map-1.0.1.tgz", + "integrity": "sha512-VCjCNfgMsby3tTdo02nbjtM/ewra6jPHmpThenkTYh8pG9ucZ/1P8So4u4FGBek/BjpOVsDCMoLA/iuBKIFXRA==", + "dependencies": { + "call-bound": "^1.0.2", + "es-errors": "^1.3.0", + "get-intrinsic": "^1.2.5", + "object-inspect": "^1.13.3" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/side-channel-weakmap": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/side-channel-weakmap/-/side-channel-weakmap-1.0.2.tgz", + "integrity": "sha512-WPS/HvHQTYnHisLo9McqBHOJk2FkHO/tlpvldyrnem4aeQp4hai3gythswg6p01oSoTl58rcpiFAjF2br2Ak2A==", + "dependencies": { + "call-bound": "^1.0.2", + "es-errors": "^1.3.0", + "get-intrinsic": "^1.2.5", + "object-inspect": "^1.13.3", + "side-channel-map": "^1.0.1" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, "node_modules/source-map-js": { "version": "1.2.1", "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz", @@ -2536,6 +3453,14 @@ "node": ">=0.10.0" } }, + "node_modules/statuses": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/statuses/-/statuses-2.0.2.tgz", + "integrity": "sha512-DvEy55V3DB7uknRo+4iOGT5fP1slR8wQohVdknigZPMpMstaKJQWhwiYBACJE3Ul2pTnATihhBYnRhZQHGBiRw==", + "engines": { + "node": ">= 0.8" + } + }, "node_modules/strip-json-comments": { "version": "3.1.1", "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-3.1.1.tgz", @@ -2576,6 +3501,14 @@ "url": "https://github.com/sponsors/SuperchupuDev" } }, + "node_modules/toidentifier": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/toidentifier/-/toidentifier-1.0.1.tgz", + "integrity": "sha512-o5sSPKEkg/DIQNmH43V0/uerLrpzVedkUh8tGNvaeXpfpuwjKenlSox/2O/BTlZUtEe+JG7s5YhEz608PlAHRA==", + "engines": { + "node": ">=0.6" + } + }, "node_modules/type-check": { "version": "0.4.0", "resolved": "https://registry.npmjs.org/type-check/-/type-check-0.4.0.tgz", @@ -2588,6 +3521,27 @@ "node": ">= 0.8.0" } }, + "node_modules/type-is": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/type-is/-/type-is-2.0.1.tgz", + "integrity": "sha512-OZs6gsjF4vMp32qrCbiVSkrFmXtG/AZhY3t0iAMrMBiAZyV9oALtXO8hsrHbMXF9x6L3grlFuwW2oAz7cav+Gw==", + "dependencies": { + "content-type": "^1.0.5", + "media-typer": "^1.1.0", + "mime-types": "^3.0.0" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/unpipe": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/unpipe/-/unpipe-1.0.0.tgz", + "integrity": "sha512-pjy2bYhSsufwWlKwPc+l3cN7+wuJlK6uz0YdJEOlQDbl6jo/YlPi4mb8agUkVC8BF7V8NuzeyPNqRksA3hztKQ==", + "engines": { + "node": ">= 0.8" + } + }, "node_modules/update-browserslist-db": { "version": "1.2.3", "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.2.3.tgz", @@ -2627,6 +3581,14 @@ "punycode": "^2.1.0" } }, + "node_modules/vary": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/vary/-/vary-1.1.2.tgz", + "integrity": "sha512-BNGbWLfd0eUPabhkXUVm0j8uuvREyTh5ovRa/dyow/BqAbZJyC+5fU+IzQOzmAKzYqYRAISoRhdQr3eIZ/PXqg==", + "engines": { + "node": ">= 0.8" + } + }, "node_modules/vite": { "version": "6.4.1", "resolved": "https://registry.npmjs.org/vite/-/vite-6.4.1.tgz", @@ -2705,7 +3667,6 @@ "version": "2.0.2", "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==", - "dev": true, "dependencies": { "isexe": "^2.0.0" }, @@ -2725,6 +3686,11 @@ "node": ">=0.10.0" } }, + "node_modules/wrappy": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", + "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==" + }, "node_modules/yallist": { "version": "3.1.1", "resolved": "https://registry.npmjs.org/yallist/-/yallist-3.1.1.tgz", @@ -2742,6 +3708,22 @@ "funding": { "url": "https://github.com/sponsors/sindresorhus" } + }, + "node_modules/zod": { + "version": "4.3.6", + "resolved": "https://registry.npmjs.org/zod/-/zod-4.3.6.tgz", + "integrity": "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg==", + "funding": { + "url": "https://github.com/sponsors/colinhacks" + } + }, + "node_modules/zod-to-json-schema": { + "version": "3.25.1", + "resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.25.1.tgz", + "integrity": "sha512-pM/SU9d3YAggzi6MtR4h7ruuQlqKtad8e9S0fmxcMi+ueAK5Korys/aWcV9LIIHTVbj01NdzxcnXSN+O74ZIVA==", + "peerDependencies": { + "zod": "^3.25 || ^4" + } } } } diff --git a/core/http/react-ui/package.json b/core/http/react-ui/package.json index 10fbdbb6e..185659e54 100644 --- a/core/http/react-ui/package.json +++ b/core/http/react-ui/package.json @@ -16,7 +16,9 @@ "highlight.js": "^11.11.1", "marked": "^15.0.7", "dompurify": "^3.2.5", - "@fortawesome/fontawesome-free": "^6.7.2" + "@fortawesome/fontawesome-free": "^6.7.2", + "@modelcontextprotocol/sdk": "^1.25.1", + "@modelcontextprotocol/ext-apps": "^1.2.2" }, "devDependencies": { "@vitejs/plugin-react": "^4.5.2", diff --git a/core/http/react-ui/src/App.css b/core/http/react-ui/src/App.css index 0e9a4a1e2..e7bbdffe9 100644 --- a/core/http/react-ui/src/App.css +++ b/core/http/react-ui/src/App.css @@ -16,6 +16,10 @@ transition: margin-left var(--duration-normal) var(--ease-default); } +.sidebar-is-collapsed .main-content { + margin-left: var(--sidebar-width-collapsed); +} + .main-content-inner { flex: 1; display: flex; @@ -136,7 +140,8 @@ z-index: 50; overflow-y: auto; box-shadow: var(--shadow-sidebar); - transition: transform var(--duration-normal) var(--ease-default); + transition: width var(--duration-normal) var(--ease-default), + transform var(--duration-normal) var(--ease-default); } .sidebar-overlay { @@ -147,8 +152,9 @@ display: flex; align-items: center; justify-content: space-between; - padding: var(--spacing-md); + padding: var(--spacing-sm) var(--spacing-sm); border-bottom: 1px solid var(--color-border-subtle); + min-height: 44px; } .sidebar-logo-link { @@ -157,11 +163,20 @@ .sidebar-logo-img { width: 100%; - max-width: 140px; + max-width: 120px; height: auto; padding: 0 var(--spacing-xs); } +.sidebar-logo-icon { + display: none; +} + +.sidebar-logo-icon-img { + width: 28px; + height: 28px; +} + .sidebar-close-btn { display: none; background: none; @@ -173,33 +188,37 @@ .sidebar-nav { flex: 1; - padding: var(--spacing-xs) 0; + padding: 2px 0; overflow-y: auto; } .sidebar-section { - padding: var(--spacing-xs) 0; + padding: 2px 0; } .sidebar-section-title { - padding: var(--spacing-sm) var(--spacing-md) var(--spacing-xs); - font-size: 0.6875rem; + padding: var(--spacing-xs) var(--spacing-sm) 2px; + font-size: 0.625rem; font-weight: 600; text-transform: uppercase; letter-spacing: 0.05em; color: var(--color-text-muted); + white-space: nowrap; + overflow: hidden; } .nav-item { display: flex; align-items: center; gap: var(--spacing-sm); - padding: var(--spacing-sm) var(--spacing-md); + padding: 6px var(--spacing-sm); color: var(--color-text-secondary); text-decoration: none; - font-size: 0.875rem; + font-size: 0.8125rem; transition: all var(--duration-fast) var(--ease-default); border-left: 3px solid transparent; + white-space: nowrap; + overflow: hidden; } .nav-item:hover { @@ -215,17 +234,100 @@ } .nav-icon { - width: 20px; + width: 18px; text-align: center; + flex-shrink: 0; + font-size: 0.85rem; } .nav-label { flex: 1; + overflow: hidden; + text-overflow: ellipsis; +} + +.nav-external { + font-size: 0.55rem; + margin-left: auto; + opacity: 0.5; + flex-shrink: 0; } .sidebar-footer { - padding: var(--spacing-sm) var(--spacing-md); + padding: var(--spacing-xs) var(--spacing-sm); border-top: 1px solid var(--color-border-subtle); + display: flex; + align-items: center; + justify-content: space-between; + gap: var(--spacing-xs); +} + +.sidebar-collapse-btn { + background: none; + border: none; + color: var(--color-text-muted); + cursor: pointer; + padding: 4px; + border-radius: var(--radius-sm); + font-size: 0.75rem; + transition: color var(--duration-fast); + flex-shrink: 0; +} + +.sidebar-collapse-btn:hover { + color: var(--color-text-primary); +} + +/* Collapsed sidebar (desktop only) */ +.sidebar.collapsed { + width: var(--sidebar-width-collapsed); +} + +.sidebar.collapsed .sidebar-logo-link { + display: none; +} + +.sidebar.collapsed .sidebar-logo-icon { + display: flex; + align-items: center; + justify-content: center; + width: 100%; +} + +.sidebar.collapsed .sidebar-header { + justify-content: center; +} + +.sidebar.collapsed .nav-label, +.sidebar.collapsed .nav-external, +.sidebar.collapsed .sidebar-section-title { + display: none; +} + +.sidebar.collapsed .nav-item { + justify-content: center; + padding: 8px 0; + border-left-width: 2px; +} + +.sidebar.collapsed .nav-icon { + width: auto; + font-size: 1rem; +} + +.sidebar.collapsed .sidebar-footer { + justify-content: center; + flex-direction: column; + gap: var(--spacing-xs); +} + +.sidebar.collapsed .theme-toggle { + padding: 4px; + font-size: 0.75rem; +} + +.sidebar.collapsed .theme-toggle .nav-label { + display: none; } /* Theme toggle */ @@ -1696,19 +1798,129 @@ border-color: var(--color-primary-border); color: var(--color-primary); } -/* Chat MCP toggle switch */ -.chat-mcp-switch { +/* Chat MCP dropdown */ +.chat-mcp-dropdown { + position: relative; + display: inline-block; +} +.chat-mcp-dropdown .btn { display: flex; align-items: center; - gap: 6px; - cursor: pointer; - user-select: none; + gap: 5px; } -.chat-mcp-switch-label { - font-size: 0.75rem; - font-weight: 500; +.chat-mcp-badge { + display: inline-flex; + align-items: center; + justify-content: center; + min-width: 18px; + height: 18px; + padding: 0 5px; + border-radius: 9px; + background: rgba(255,255,255,0.25); + font-size: 0.7rem; + font-weight: 600; + line-height: 1; +} +.chat-mcp-dropdown-menu { + position: absolute; + top: calc(100% + 4px); + right: 0; + z-index: 100; + min-width: 240px; + max-height: 320px; + overflow-y: auto; + background: var(--color-bg-primary); + border: 1px solid var(--color-border-subtle); + border-radius: var(--radius-md); + box-shadow: var(--shadow-lg); +} +.chat-mcp-dropdown-loading, +.chat-mcp-dropdown-empty { + padding: var(--spacing-sm) var(--spacing-md); + font-size: 0.8125rem; color: var(--color-text-secondary); } +.chat-mcp-dropdown-header { + display: flex; + align-items: center; + justify-content: space-between; + padding: var(--spacing-xs) var(--spacing-md); + border-bottom: 1px solid var(--color-border-divider); + font-size: 0.75rem; + font-weight: 600; + color: var(--color-text-secondary); + text-transform: uppercase; + letter-spacing: 0.03em; +} +.chat-mcp-select-all { + background: none; + border: none; + padding: 0; + font-size: 0.75rem; + color: var(--color-accent); + cursor: pointer; + text-transform: none; + letter-spacing: 0; +} +.chat-mcp-select-all:hover { + text-decoration: underline; +} +.chat-mcp-server-item { + display: flex; + align-items: center; + gap: 8px; + padding: var(--spacing-xs) var(--spacing-md); + cursor: pointer; + transition: background 120ms; +} +.chat-mcp-server-item:hover { + background: var(--color-bg-hover); +} +.chat-mcp-server-item input[type="checkbox"] { + flex-shrink: 0; +} +.chat-mcp-server-info { + display: flex; + flex-direction: column; + gap: 1px; + min-width: 0; +} +.chat-mcp-server-name { + font-size: 0.8125rem; + font-weight: 500; + color: var(--color-text-primary); + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; +} +.chat-mcp-server-tools { + font-size: 0.7rem; + color: var(--color-text-tertiary); +} + +/* Client MCP status indicators */ +.chat-client-mcp-status { + display: inline-block; + width: 8px; + height: 8px; + border-radius: 50%; + flex-shrink: 0; + background: var(--color-text-tertiary); +} +.chat-client-mcp-status-connected { + background: #22c55e; + box-shadow: 0 0 4px rgba(34, 197, 94, 0.5); +} +.chat-client-mcp-status-connecting { + background: #f59e0b; + animation: pulse 1s infinite; +} +.chat-client-mcp-status-error { + background: #ef4444; +} +.chat-client-mcp-status-disconnected { + background: var(--color-text-tertiary); +} /* Chat model info panel */ .chat-model-info-panel { @@ -2035,7 +2247,8 @@ /* Responsive */ @media (max-width: 1023px) { - .main-content { + .main-content, + .sidebar-is-collapsed .main-content { margin-left: 0; } @@ -2045,6 +2258,11 @@ .sidebar { transform: translateX(-100%); + width: var(--sidebar-width); + } + + .sidebar.collapsed { + width: var(--sidebar-width); } .sidebar.open { @@ -2055,6 +2273,39 @@ display: block; } + .sidebar-collapse-btn { + display: none; + } + + .sidebar.collapsed .nav-label, + .sidebar.collapsed .nav-external, + .sidebar.collapsed .sidebar-section-title { + display: unset; + } + + .sidebar.collapsed .sidebar-logo-link { + display: block; + } + + .sidebar.collapsed .sidebar-logo-icon { + display: none; + } + + .sidebar.collapsed .nav-item { + justify-content: flex-start; + padding: 6px var(--spacing-sm); + border-left-width: 3px; + } + + .sidebar.collapsed .nav-icon { + width: 18px; + font-size: 0.85rem; + } + + .sidebar.collapsed .sidebar-header { + justify-content: space-between; + } + .sidebar-overlay { display: block; position: fixed; @@ -2388,3 +2639,37 @@ gap: var(--spacing-xs); } } + +/* MCP App Frame */ +.mcp-app-frame-container { + width: 100%; + margin: var(--spacing-sm) 0; + border-radius: var(--border-radius-md); + overflow: hidden; + border: 1px solid var(--color-border-subtle); +} + +.mcp-app-iframe { + width: 100%; + border: none; + display: block; + min-height: 100px; + max-height: 600px; + transition: height 0.2s ease; + background: var(--color-bg-primary); +} + +.mcp-app-error { + padding: var(--spacing-sm) var(--spacing-md); + color: var(--color-text-danger, #e53e3e); + font-size: 0.85rem; +} + +.mcp-app-reconnect-overlay { + padding: var(--spacing-sm); + text-align: center; + font-size: 0.8rem; + color: var(--color-text-secondary); + background: var(--color-bg-secondary); + border-top: 1px solid var(--color-border-subtle); +} diff --git a/core/http/react-ui/src/App.jsx b/core/http/react-ui/src/App.jsx index 267c031cb..6320cd780 100644 --- a/core/http/react-ui/src/App.jsx +++ b/core/http/react-ui/src/App.jsx @@ -5,8 +5,13 @@ import OperationsBar from './components/OperationsBar' import { ToastContainer, useToast } from './components/Toast' import { systemApi } from './utils/api' +const COLLAPSED_KEY = 'localai_sidebar_collapsed' + export default function App() { const [sidebarOpen, setSidebarOpen] = useState(false) + const [sidebarCollapsed, setSidebarCollapsed] = useState(() => { + try { return localStorage.getItem(COLLAPSED_KEY) === 'true' } catch (_) { return false } + }) const { toasts, addToast, removeToast } = useToast() const [version, setVersion] = useState('') const location = useLocation() @@ -18,8 +23,20 @@ export default function App() { .catch(() => {}) }, []) + useEffect(() => { + const handler = (e) => setSidebarCollapsed(e.detail.collapsed) + window.addEventListener('sidebar-collapse', handler) + return () => window.removeEventListener('sidebar-collapse', handler) + }, []) + + const layoutClasses = [ + 'app-layout', + isChatRoute ? 'app-layout-chat' : '', + sidebarCollapsed ? 'sidebar-is-collapsed' : '', + ].filter(Boolean).join(' ') + return ( -
+
setSidebarOpen(false)} />
diff --git a/core/http/react-ui/src/components/ClientMCPDropdown.jsx b/core/http/react-ui/src/components/ClientMCPDropdown.jsx new file mode 100644 index 000000000..da26fecfb --- /dev/null +++ b/core/http/react-ui/src/components/ClientMCPDropdown.jsx @@ -0,0 +1,154 @@ +import { useState, useEffect, useRef, useCallback } from 'react' +import { loadClientMCPServers, addClientMCPServer, removeClientMCPServer } from '../utils/mcpClientStorage' + +export default function ClientMCPDropdown({ + activeServerIds = [], + onToggleServer, + onServerAdded, + onServerRemoved, + connectionStatuses = {}, + getConnectedTools, +}) { + const [open, setOpen] = useState(false) + const [addDialog, setAddDialog] = useState(false) + const [servers, setServers] = useState(() => loadClientMCPServers()) + const [url, setUrl] = useState('') + const [name, setName] = useState('') + const [authToken, setAuthToken] = useState('') + const [useProxy, setUseProxy] = useState(true) + const ref = useRef(null) + + useEffect(() => { + if (!open) return + const handleClick = (e) => { + if (ref.current && !ref.current.contains(e.target)) setOpen(false) + } + document.addEventListener('mousedown', handleClick) + return () => document.removeEventListener('mousedown', handleClick) + }, [open]) + + const handleAdd = useCallback(() => { + if (!url.trim()) return + const headers = {} + if (authToken.trim()) { + headers.Authorization = `Bearer ${authToken.trim()}` + } + const server = addClientMCPServer({ name: name.trim() || undefined, url: url.trim(), headers, useProxy }) + setServers(loadClientMCPServers()) + setUrl('') + setName('') + setAuthToken('') + setUseProxy(true) + setAddDialog(false) + if (onServerAdded) onServerAdded(server) + }, [url, name, authToken, useProxy, onServerAdded]) + + const handleRemove = useCallback((id) => { + removeClientMCPServer(id) + setServers(loadClientMCPServers()) + if (onServerRemoved) onServerRemoved(id) + }, [onServerRemoved]) + + const activeCount = activeServerIds.length + + return ( +
+ + {open && ( +
+
+ Client MCP Servers + +
+ {addDialog && ( +
+ setUrl(e.target.value)} + style={{ width: '100%', marginBottom: '4px' }} + /> + setName(e.target.value)} + style={{ width: '100%', marginBottom: '4px' }} + /> + setAuthToken(e.target.value)} + style={{ width: '100%', marginBottom: '4px' }} + /> + +
+ + +
+
+ )} + {servers.length === 0 && !addDialog ? ( +
No client MCP servers configured
+ ) : ( + servers.map(server => { + const status = connectionStatuses[server.id]?.status || 'disconnected' + const isActive = activeServerIds.includes(server.id) + const connTools = getConnectedTools?.().find(c => c.serverId === server.id) + return ( + + ) + }) + )} +
+ )} +
+ ) +} diff --git a/core/http/react-ui/src/components/MCPAppFrame.jsx b/core/http/react-ui/src/components/MCPAppFrame.jsx new file mode 100644 index 000000000..f989fbe72 --- /dev/null +++ b/core/http/react-ui/src/components/MCPAppFrame.jsx @@ -0,0 +1,104 @@ +import { useRef, useEffect, useState, useCallback } from 'react' +import { AppBridge, PostMessageTransport, buildAllowAttribute } from '@modelcontextprotocol/ext-apps/app-bridge' + +export default function MCPAppFrame({ toolName, toolInput, toolResult, mcpClient, toolDefinition: _toolDefinition, appHtml, resourceMeta }) { + const iframeRef = useRef(null) + const bridgeRef = useRef(null) + const [iframeHeight, setIframeHeight] = useState(200) + const [error, setError] = useState(null) + const initializedRef = useRef(false) + + const setupBridge = useCallback(async () => { + if (!mcpClient || !iframeRef.current || initializedRef.current) return + + const iframe = iframeRef.current + initializedRef.current = true + + try { + const transport = new PostMessageTransport(iframe.contentWindow, iframe.contentWindow) + const bridge = new AppBridge( + mcpClient, + { name: 'LocalAI', version: '1.0.0' }, + { openLinks: {}, serverTools: {}, serverResources: {}, logging: {} }, + { hostContext: { displayMode: 'inline' } } + ) + + bridge.oninitialized = () => { + if (toolInput) bridge.sendToolInput({ arguments: toolInput }) + if (toolResult) bridge.sendToolResult(toolResult) + } + + bridge.onsizechange = ({ height }) => { + if (height && height > 0) setIframeHeight(Math.min(height, 600)) + } + + bridge.onopenlink = async ({ url }) => { + window.open(url, '_blank', 'noopener,noreferrer') + return {} + } + + bridge.onmessage = async () => { + return {} + } + + bridge.onrequestdisplaymode = async () => { + return { mode: 'inline' } + } + + await bridge.connect(transport) + bridgeRef.current = bridge + } catch (err) { + setError(`Bridge error: ${err.message}`) + } + }, [mcpClient, toolInput, toolResult]) + + const handleIframeLoad = useCallback(() => { + setupBridge() + }, [setupBridge]) + + // Send toolResult when it arrives after initialization + useEffect(() => { + if (bridgeRef.current && toolResult && initializedRef.current) { + bridgeRef.current.sendToolResult(toolResult) + } + }, [toolResult]) + + // Cleanup on unmount — only close the local transport, don't send + // teardownResource which would kill server-side state and cause + // "Connection closed" errors if the component remounts (e.g. when + // streaming ends and ActivityGroup takes over from StreamingActivity). + useEffect(() => { + return () => { + const bridge = bridgeRef.current + if (bridge) { + try { bridge.close() } catch (_) { /* ignore */ } + } + } + }, []) + + if (!appHtml) return null + + const permissions = resourceMeta?.permissions + const allowAttr = permissions ? buildAllowAttribute(permissions) : undefined + + return ( +
+