Compare commits

..

12 Commits

Author SHA1 Message Date
Ettore Di Giacinto
61a6e95f7d Additional thinking tags
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-01-20 12:02:35 +01:00
Ettore Di Giacinto
a352125726 chore: refactorings
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-01-20 11:48:00 +01:00
Ettore Di Giacinto
187e474daf fix(reasoning): handle only closing tags
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-01-20 11:40:29 +01:00
Ettore Di Giacinto
4bf2f8bbd8 chore(docs): update docs with Anthropic API and openresponses
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-01-20 09:25:24 +01:00
LocalAI [bot]
d3525b7509 chore: ⬆️ Update ggml-org/llama.cpp to 959ecf7f234dc0bc0cd6829b25cb0ee1481aa78a (#8122)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2026-01-19 22:50:47 +01:00
LocalAI [bot]
c8aa821e0e chore: ⬆️ Update leejet/stable-diffusion.cpp to a48b4a3ade9972faf0adcad47e51c6fc03f0e46d (#8121)
⬆️ Update leejet/stable-diffusion.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2026-01-19 22:27:46 +01:00
dependabot[bot]
b3191927ae chore(deps): bump github.com/mudler/cogito from 0.7.2 to 0.8.1 (#8124)
Bumps [github.com/mudler/cogito](https://github.com/mudler/cogito) from 0.7.2 to 0.8.1.
- [Release notes](https://github.com/mudler/cogito/releases)
- [Commits](https://github.com/mudler/cogito/compare/v0.7.2...v0.8.1)

---
updated-dependencies:
- dependency-name: github.com/mudler/cogito
  dependency-version: 0.8.1
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-01-19 22:26:26 +01:00
LocalAI [bot]
54c5a2d9ea docs: ⬆️ update docs version mudler/LocalAI (#8120)
⬆️ Update docs version mudler/LocalAI

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2026-01-19 21:18:24 +00:00
Ettore Di Giacinto
0279591fec Enable reranking for Qwen3-VL-Reranker-8B
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2026-01-19 15:28:58 +01:00
LocalAI [bot]
8845186955 chore: ⬆️ Update leejet/stable-diffusion.cpp to 2efd19978dd4164e387bf226025c9666b6ef35e2 (#8099)
⬆️ Update leejet/stable-diffusion.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2026-01-18 22:40:35 +01:00
LocalAI [bot]
ab8ed24358 chore: ⬆️ Update ggml-org/llama.cpp to 287a33017b32600bfc0e81feeb0ad6e81e0dd484 (#8100)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2026-01-18 22:40:14 +01:00
LocalAI [bot]
a021df5a88 feat(swagger): update swagger (#8098)
Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2026-01-18 22:10:06 +01:00
20 changed files with 2677 additions and 395 deletions

View File

@@ -1,5 +1,5 @@
LLAMA_VERSION?=2fbde785bc106ae1c4102b0e82b9b41d9c466579
LLAMA_VERSION?=959ecf7f234dc0bc0cd6829b25cb0ee1481aa78a
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
CMAKE_ARGS?=

View File

@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
# stablediffusion.cpp (ggml)
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
STABLEDIFFUSION_GGML_VERSION?=9565c7f6bd5fcff124c589147b2621244f2c4aa1
STABLEDIFFUSION_GGML_VERSION?=a48b4a3ade9972faf0adcad47e51c6fc03f0e46d
CMAKE_ARGS+=-DGGML_MAX_NAME=128

View File

@@ -10,6 +10,7 @@ import (
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/downloader"
"github.com/mudler/LocalAI/pkg/functions"
"github.com/mudler/LocalAI/pkg/reasoning"
"github.com/mudler/cogito"
"gopkg.in/yaml.v3"
)
@@ -51,6 +52,7 @@ type ModelConfig struct {
ResponseFormatMap map[string]interface{} `yaml:"-" json:"-"`
FunctionsConfig functions.FunctionsConfig `yaml:"function,omitempty" json:"function,omitempty"`
ReasoningConfig reasoning.ReasoningConfig `yaml:"reasoning,omitempty" json:"reasoning,omitempty"`
FeatureFlag FeatureFlag `yaml:"feature_flags,omitempty" json:"feature_flags,omitempty"` // Feature Flag registry. We move fast, and features may break on a per model/backend basis. Registry for (usually temporary) flags that indicate aborting something early.
// LLM configs (GPT4ALL, Llama.cpp, ...)

View File

@@ -13,6 +13,7 @@ import (
"github.com/mudler/LocalAI/core/http/middleware"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/functions"
"github.com/mudler/LocalAI/pkg/reasoning"
"github.com/mudler/LocalAI/core/templates"
"github.com/mudler/LocalAI/pkg/model"
@@ -43,10 +44,19 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
lastEmittedReasoning := ""
lastEmittedCleanedContent := ""
// Configure reasoning extraction options
// Auto-detect if prompt ends with thinking tag
// or use explicit config setting
thinkingForcedOpen := config.ReasoningConfig.ThinkingForcedOpen || reasoning.DetectThinkingForcedOpen(s)
_, _, err := ComputeChoices(req, s, config, cl, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, tokenUsage backend.TokenUsage) bool {
accumulatedContent += s
// Extract reasoning from accumulated content
currentReasoning, cleanedContent := functions.ExtractReasoning(accumulatedContent)
opts := []reasoning.Option{}
if thinkingForcedOpen {
opts = append(opts, reasoning.WithThinkingForcedOpen())
}
currentReasoning, cleanedContent := reasoning.Extract(accumulatedContent, opts...)
// Calculate new reasoning delta (what we haven't emitted yet)
var reasoningDelta *string
@@ -230,7 +240,13 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
return err
}
// Extract reasoning before processing tool calls
reasoning, cleanedResult := functions.ExtractReasoning(result)
// Auto-detect if prompt ends with thinking tag or use explicit config
toolsThinkingForcedOpen := config.ReasoningConfig.ThinkingForcedOpen || reasoning.DetectThinkingForcedOpen(prompt)
opts := []reasoning.Option{}
if toolsThinkingForcedOpen {
opts = append(opts, reasoning.WithThinkingForcedOpen())
}
extractedReasoning, cleanedResult := reasoning.Extract(result, opts...)
result = cleanedResult
textContentToReturn = functions.ParseTextContent(result, config.FunctionsConfig)
@@ -266,8 +282,8 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
}
var deltaReasoning *string
if reasoning != "" {
deltaReasoning = &reasoning
if extractedReasoning != "" {
deltaReasoning = &extractedReasoning
}
delta := &schema.Message{Content: &result}
if deltaReasoning != nil {
@@ -618,17 +634,24 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
// no streaming mode
default:
// Auto-detect if prompt ends with thinking tag for non-streaming mode
nonStreamThinkingForcedOpen := config.ReasoningConfig.ThinkingForcedOpen || reasoning.DetectThinkingForcedOpen(predInput)
tokenCallback := func(s string, c *[]schema.Choice) {
// Extract reasoning from the response
reasoning, cleanedS := functions.ExtractReasoning(s)
s = cleanedS
var extractedReasoning string
opts := []reasoning.Option{}
if nonStreamThinkingForcedOpen {
opts = append(opts, reasoning.WithThinkingForcedOpen())
}
extractedReasoning, s = reasoning.Extract(s, opts...)
if !shouldUseFn {
// no function is called, just reply and use stop as finish reason
stopReason := FinishReasonStop
message := &schema.Message{Role: "assistant", Content: &s}
if reasoning != "" {
message.Reasoning = &reasoning
if extractedReasoning != "" {
message.Reasoning = &extractedReasoning
}
*c = append(*c, schema.Choice{FinishReason: &stopReason, Index: 0, Message: message})
return
@@ -650,8 +673,8 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
stopReason := FinishReasonStop
message := &schema.Message{Role: "assistant", Content: &result}
if reasoning != "" {
message.Reasoning = &reasoning
if extractedReasoning != "" {
message.Reasoning = &extractedReasoning
}
*c = append(*c, schema.Choice{
FinishReason: &stopReason,
@@ -664,8 +687,8 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
Role: "assistant",
},
}
if reasoning != "" {
toolChoice.Message.Reasoning = &reasoning
if extractedReasoning != "" {
toolChoice.Message.Reasoning = &extractedReasoning
}
for _, ss := range results {
@@ -695,8 +718,8 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
"arguments": args,
},
}
if reasoning != "" {
message.Reasoning = &reasoning
if extractedReasoning != "" {
message.Reasoning = &extractedReasoning
}
*c = append(*c, schema.Choice{
FinishReason: &functionCallReason,

View File

@@ -72,6 +72,359 @@ You can list all the models available with:
curl http://localhost:8080/v1/models
```
### Anthropic Messages API
LocalAI supports the Anthropic Messages API, which is compatible with Claude clients. This endpoint provides a structured way to send messages and receive responses, with support for tools, streaming, and multimodal content.
**Endpoint:** `POST /v1/messages` or `POST /messages`
**Reference:** https://docs.anthropic.com/claude/reference/messages_post
#### Basic Usage
```bash
curl http://localhost:8080/v1/messages \
-H "Content-Type: application/json" \
-H "anthropic-version: 2023-06-01" \
-d '{
"model": "ggml-koala-7b-model-q4_0-r2.bin",
"max_tokens": 1024,
"messages": [
{"role": "user", "content": "Say this is a test!"}
]
}'
```
#### Request Parameters
| Parameter | Type | Required | Description |
|-----------|------|----------|-------------|
| `model` | string | Yes | The model identifier |
| `messages` | array | Yes | Array of message objects with `role` and `content` |
| `max_tokens` | integer | Yes | Maximum number of tokens to generate (must be > 0) |
| `system` | string | No | System message to set the assistant's behavior |
| `temperature` | float | No | Sampling temperature (0.0 to 1.0) |
| `top_p` | float | No | Nucleus sampling parameter |
| `top_k` | integer | No | Top-k sampling parameter |
| `stop_sequences` | array | No | Array of strings that will stop generation |
| `stream` | boolean | No | Enable streaming responses |
| `tools` | array | No | Array of tool definitions for function calling |
| `tool_choice` | string/object | No | Tool choice strategy: "auto", "any", "none", or specific tool |
| `metadata` | object | No | Custom metadata to attach to the request |
#### Message Format
Messages can contain text or structured content blocks:
```bash
curl http://localhost:8080/v1/messages \
-H "Content-Type: application/json" \
-d '{
"model": "ggml-koala-7b-model-q4_0-r2.bin",
"max_tokens": 1024,
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": "What is in this image?"
},
{
"type": "image",
"source": {
"type": "base64",
"media_type": "image/jpeg",
"data": "base64_encoded_image_data"
}
}
]
}
]
}'
```
#### Tool Calling
The Anthropic API supports function calling through tools:
```bash
curl http://localhost:8080/v1/messages \
-H "Content-Type: application/json" \
-d '{
"model": "ggml-koala-7b-model-q4_0-r2.bin",
"max_tokens": 1024,
"tools": [
{
"name": "get_weather",
"description": "Get the current weather",
"input_schema": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state"
}
},
"required": ["location"]
}
}
],
"tool_choice": "auto",
"messages": [
{"role": "user", "content": "What is the weather in San Francisco?"}
]
}'
```
#### Streaming
Enable streaming responses by setting `stream: true`:
```bash
curl http://localhost:8080/v1/messages \
-H "Content-Type: application/json" \
-d '{
"model": "ggml-koala-7b-model-q4_0-r2.bin",
"max_tokens": 1024,
"stream": true,
"messages": [
{"role": "user", "content": "Tell me a story"}
]
}'
```
Streaming responses use Server-Sent Events (SSE) format with event types: `message_start`, `content_block_start`, `content_block_delta`, `content_block_stop`, `message_delta`, and `message_stop`.
#### Response Format
```json
{
"id": "msg_abc123",
"type": "message",
"role": "assistant",
"content": [
{
"type": "text",
"text": "This is a test!"
}
],
"model": "ggml-koala-7b-model-q4_0-r2.bin",
"stop_reason": "end_turn",
"usage": {
"input_tokens": 10,
"output_tokens": 5
}
}
```
### Open Responses API
LocalAI supports the Open Responses API specification, which provides a standardized interface for AI model interactions with support for background processing, streaming, tool calling, and advanced features like reasoning.
**Endpoint:** `POST /v1/responses` or `POST /responses`
**Reference:** https://www.openresponses.org/specification
#### Basic Usage
```bash
curl http://localhost:8080/v1/responses \
-H "Content-Type: application/json" \
-d '{
"model": "ggml-koala-7b-model-q4_0-r2.bin",
"input": "Say this is a test!",
"max_output_tokens": 1024
}'
```
#### Request Parameters
| Parameter | Type | Required | Description |
|-----------|------|----------|-------------|
| `model` | string | Yes | The model identifier |
| `input` | string/array | Yes | Input text or array of input items |
| `max_output_tokens` | integer | No | Maximum number of tokens to generate |
| `temperature` | float | No | Sampling temperature |
| `top_p` | float | No | Nucleus sampling parameter |
| `instructions` | string | No | System instructions |
| `tools` | array | No | Array of tool definitions |
| `tool_choice` | string/object | No | Tool choice: "auto", "required", "none", or specific tool |
| `stream` | boolean | No | Enable streaming responses |
| `background` | boolean | No | Run request in background (returns immediately) |
| `store` | boolean | No | Whether to store the response |
| `reasoning` | object | No | Reasoning configuration with `effort` and `summary` |
| `parallel_tool_calls` | boolean | No | Allow parallel tool calls |
| `max_tool_calls` | integer | No | Maximum number of tool calls |
| `presence_penalty` | float | No | Presence penalty (-2.0 to 2.0) |
| `frequency_penalty` | float | No | Frequency penalty (-2.0 to 2.0) |
| `top_logprobs` | integer | No | Number of top logprobs to return |
| `truncation` | string | No | Truncation mode: "auto" or "disabled" |
| `text_format` | object | No | Text format configuration |
| `metadata` | object | No | Custom metadata |
#### Input Format
Input can be a simple string or an array of structured items:
```bash
curl http://localhost:8080/v1/responses \
-H "Content-Type: application/json" \
-d '{
"model": "ggml-koala-7b-model-q4_0-r2.bin",
"input": [
{
"type": "message",
"role": "user",
"content": "What is the weather?"
}
],
"max_output_tokens": 1024
}'
```
#### Background Processing
Run requests in the background for long-running tasks:
```bash
curl http://localhost:8080/v1/responses \
-H "Content-Type: application/json" \
-d '{
"model": "ggml-koala-7b-model-q4_0-r2.bin",
"input": "Generate a long story",
"max_output_tokens": 4096,
"background": true
}'
```
The response will include a response ID that can be used to poll for completion:
```json
{
"id": "resp_abc123",
"object": "response",
"status": "in_progress",
"created_at": 1234567890
}
```
#### Retrieving Background Responses
Use the GET endpoint to retrieve background responses:
```bash
# Get response by ID
curl http://localhost:8080/v1/responses/resp_abc123
# Resume streaming with query parameters
curl "http://localhost:8080/v1/responses/resp_abc123?stream=true&starting_after=10"
```
#### Canceling Background Responses
Cancel a background response that's still in progress:
```bash
curl -X POST http://localhost:8080/v1/responses/resp_abc123/cancel
```
#### Tool Calling
Open Responses API supports function calling with tools:
```bash
curl http://localhost:8080/v1/responses \
-H "Content-Type: application/json" \
-d '{
"model": "ggml-koala-7b-model-q4_0-r2.bin",
"input": "What is the weather in San Francisco?",
"tools": [
{
"type": "function",
"name": "get_weather",
"description": "Get the current weather",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state"
}
},
"required": ["location"]
}
}
],
"tool_choice": "auto",
"max_output_tokens": 1024
}'
```
#### Reasoning Configuration
Configure reasoning effort and summary style:
```bash
curl http://localhost:8080/v1/responses \
-H "Content-Type: application/json" \
-d '{
"model": "ggml-koala-7b-model-q4_0-r2.bin",
"input": "Solve this complex problem step by step",
"reasoning": {
"effort": "high",
"summary": "detailed"
},
"max_output_tokens": 2048
}'
```
#### Response Format
```json
{
"id": "resp_abc123",
"object": "response",
"created_at": 1234567890,
"completed_at": 1234567895,
"status": "completed",
"model": "ggml-koala-7b-model-q4_0-r2.bin",
"output": [
{
"type": "message",
"id": "msg_001",
"role": "assistant",
"content": [
{
"type": "output_text",
"text": "This is a test!",
"annotations": [],
"logprobs": []
}
],
"status": "completed"
}
],
"error": null,
"incomplete_details": null,
"temperature": 0.7,
"top_p": 1.0,
"presence_penalty": 0.0,
"frequency_penalty": 0.0,
"usage": {
"input_tokens": 10,
"output_tokens": 5,
"total_tokens": 15,
"input_tokens_details": {
"cached_tokens": 0
},
"output_tokens_details": {
"reasoning_tokens": 0
}
}
}
```
## Backends
### RWKV

View File

@@ -112,6 +112,66 @@ curl http://localhost:8080/v1/chat/completions \
</details>
### Anthropic Messages API
LocalAI supports the Anthropic Messages API for Claude-compatible models. [Anthropic documentation](https://docs.anthropic.com/claude/reference/messages_post).
<details>
```bash
curl http://localhost:8080/v1/messages \
-H "Content-Type: application/json" \
-H "anthropic-version: 2023-06-01" \
-d '{
"model": "gpt-4",
"max_tokens": 1024,
"messages": [
{"role": "user", "content": "How are you doing?"}
],
"temperature": 0.7
}'
```
</details>
### Open Responses API
LocalAI supports the Open Responses API specification with support for background processing, streaming, and advanced features. [Open Responses documentation](https://www.openresponses.org/specification).
<details>
```bash
curl http://localhost:8080/v1/responses \
-H "Content-Type: application/json" \
-d '{
"model": "gpt-4",
"input": "Say this is a test!",
"max_output_tokens": 1024,
"temperature": 0.7
}'
```
For background processing:
```bash
curl http://localhost:8080/v1/responses \
-H "Content-Type: application/json" \
-d '{
"model": "gpt-4",
"input": "Generate a long story",
"max_output_tokens": 4096,
"background": true
}'
```
Then retrieve the response:
```bash
curl http://localhost:8080/v1/responses/<response_id>
```
</details>
### Image Generation
Creates an image given a prompt. [OpenAI documentation](https://platform.openai.com/docs/api-reference/images/create).

View File

@@ -1,3 +1,3 @@
{
"version": "v3.9.0"
"version": "v3.10.0"
}

View File

@@ -29,6 +29,7 @@
This description emphasizes its capabilities, efficiency, and versatility for multimodal search tasks.
overrides:
reranking: true
parameters:
model: llama-cpp/models/Qwen3-VL-Reranker-8B.Q4_K_M.gguf
name: Qwen3-VL-Reranker-8B-GGUF

2
go.mod
View File

@@ -32,7 +32,7 @@ require (
github.com/mholt/archiver/v3 v3.5.1
github.com/microcosm-cc/bluemonday v1.0.27
github.com/modelcontextprotocol/go-sdk v1.2.0
github.com/mudler/cogito v0.7.2
github.com/mudler/cogito v0.8.1
github.com/mudler/edgevpn v0.31.1
github.com/mudler/go-processmanager v0.1.0
github.com/mudler/memory v0.0.0-20251216220809-d1256471a6c2

4
go.sum
View File

@@ -507,8 +507,8 @@ github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7P
github.com/mr-tron/base58 v1.1.2/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc=
github.com/mr-tron/base58 v1.2.0 h1:T/HDJBh4ZCPbU39/+c3rRvE0uKBQlU27+QI8LJ4t64o=
github.com/mr-tron/base58 v1.2.0/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc=
github.com/mudler/cogito v0.7.2 h1:J5eHZPsxpoKcnYUfogje5u0nnzGww7ytv7nSn1DMpms=
github.com/mudler/cogito v0.7.2/go.mod h1:6sfja3lcu2nWRzEc0wwqGNu/eCG3EWgij+8s7xyUeQ4=
github.com/mudler/cogito v0.8.1 h1:66qPJkAMrq/Vo8AC/PvXWuVxYPhi7X2DQuJIilL8+3I=
github.com/mudler/cogito v0.8.1/go.mod h1:6sfja3lcu2nWRzEc0wwqGNu/eCG3EWgij+8s7xyUeQ4=
github.com/mudler/edgevpn v0.31.1 h1:7qegiDWd0kAg6ljhNHxqvp8hbo/6BbzSdbb7/2WZfiY=
github.com/mudler/edgevpn v0.31.1/go.mod h1:ftV5B0nKFzm4R8vR80UYnCb2nf7lxCRgAALxUEEgCf8=
github.com/mudler/go-piper v0.0.0-20241023091659-2494246fd9fc h1:RxwneJl1VgvikiX28EkpdAyL4yQVnJMrbquKospjHyA=

View File

@@ -1,114 +0,0 @@
package functions
import (
"strings"
)
// ExtractReasoning extracts reasoning content from thinking tags and returns
// both the extracted reasoning and the cleaned content (with tags removed).
// It handles <thinking>...</thinking> and <think>...</think> tags.
// Multiple reasoning blocks are concatenated with newlines.
func ExtractReasoning(content string) (reasoning string, cleanedContent string) {
if content == "" {
return "", content
}
var reasoningParts []string
var cleanedParts []string
remaining := content
// Define tag pairs to look for
tagPairs := []struct {
start string
end string
}{
{"<thinking>", "</thinking>"},
{"<think>", "</think>"},
}
// Track the last position we've processed
lastPos := 0
for {
// Find the earliest tag start
earliestStart := -1
earliestEnd := -1
isUnclosed := false
var matchedTag struct {
start string
end string
}
for _, tagPair := range tagPairs {
startIdx := strings.Index(remaining[lastPos:], tagPair.start)
if startIdx == -1 {
continue
}
startIdx += lastPos
// Find the corresponding end tag
endIdx := strings.Index(remaining[startIdx+len(tagPair.start):], tagPair.end)
if endIdx == -1 {
// Unclosed tag - extract what we have
if earliestStart == -1 || startIdx < earliestStart {
earliestStart = startIdx
earliestEnd = len(remaining)
isUnclosed = true
matchedTag = tagPair
}
continue
}
endIdx += startIdx + len(tagPair.start)
// Found a complete tag pair
if earliestStart == -1 || startIdx < earliestStart {
earliestStart = startIdx
earliestEnd = endIdx + len(tagPair.end)
isUnclosed = false
matchedTag = tagPair
}
}
if earliestStart == -1 {
// No more tags found, add remaining content
if lastPos < len(remaining) {
cleanedParts = append(cleanedParts, remaining[lastPos:])
}
break
}
// Add content before the tag
if earliestStart > lastPos {
cleanedParts = append(cleanedParts, remaining[lastPos:earliestStart])
}
// Extract reasoning content
reasoningStart := earliestStart + len(matchedTag.start)
// For unclosed tags, earliestEnd is already at the end of the string
// For closed tags, earliestEnd points to after the closing tag, so we subtract the end tag length
var reasoningEnd int
if isUnclosed {
// Unclosed tag - extract everything to the end
reasoningEnd = len(remaining)
} else {
// Closed tag - exclude the end tag
reasoningEnd = earliestEnd - len(matchedTag.end)
}
if reasoningEnd > reasoningStart {
reasoningContent := strings.TrimSpace(remaining[reasoningStart:reasoningEnd])
if reasoningContent != "" {
reasoningParts = append(reasoningParts, reasoningContent)
}
}
// Move past this tag
lastPos = earliestEnd
}
// Combine reasoning parts
reasoning = strings.Join(reasoningParts, "\n\n")
// Combine cleaned content parts
cleanedContent = strings.Join(cleanedParts, "")
return reasoning, cleanedContent
}

View File

@@ -1,261 +0,0 @@
package functions_test
import (
"strings"
. "github.com/mudler/LocalAI/pkg/functions"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
var _ = Describe("ExtractReasoning", func() {
Context("when content has no reasoning tags", func() {
It("should return empty reasoning and original content", func() {
content := "This is regular content without any tags."
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(BeEmpty())
Expect(cleaned).To(Equal(content))
})
It("should handle empty string", func() {
content := ""
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(BeEmpty())
Expect(cleaned).To(BeEmpty())
})
It("should handle content with only whitespace", func() {
content := " \n\t "
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(BeEmpty())
Expect(cleaned).To(Equal(content))
})
})
Context("when content has <thinking> tags", func() {
It("should extract reasoning from single thinking block", func() {
content := "Some text <thinking>This is my reasoning</thinking> More text"
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(Equal("This is my reasoning"))
Expect(cleaned).To(Equal("Some text More text"))
})
It("should extract reasoning and preserve surrounding content", func() {
content := "Before <thinking>Reasoning here</thinking> After"
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(Equal("Reasoning here"))
Expect(cleaned).To(Equal("Before After"))
})
It("should handle thinking block at the start", func() {
content := "<thinking>Start reasoning</thinking> Regular content"
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(Equal("Start reasoning"))
Expect(cleaned).To(Equal(" Regular content"))
})
It("should handle thinking block at the end", func() {
content := "Regular content <thinking>End reasoning</thinking>"
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(Equal("End reasoning"))
Expect(cleaned).To(Equal("Regular content "))
})
It("should handle only thinking block", func() {
content := "<thinking>Only reasoning</thinking>"
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(Equal("Only reasoning"))
Expect(cleaned).To(BeEmpty())
})
It("should trim whitespace from reasoning content", func() {
content := "Text <thinking> \n Reasoning with spaces \n </thinking> More"
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(Equal("Reasoning with spaces"))
Expect(cleaned).To(Equal("Text More"))
})
})
Context("when content has <think> tags", func() {
It("should extract reasoning from redacted_reasoning block", func() {
content := "Text <think>Redacted reasoning</think> More"
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(Equal("Redacted reasoning"))
Expect(cleaned).To(Equal("Text More"))
})
It("should handle redacted_reasoning with multiline content", func() {
content := "Before <think>Line 1\nLine 2\nLine 3</think> After"
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(Equal("Line 1\nLine 2\nLine 3"))
Expect(cleaned).To(Equal("Before After"))
})
It("should handle redacted_reasoning with complex content", func() {
content := "Start <think>Complex reasoning\nwith\nmultiple\nlines</think> End"
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(Equal("Complex reasoning\nwith\nmultiple\nlines"))
Expect(cleaned).To(Equal("Start End"))
})
})
Context("when content has multiple reasoning blocks", func() {
It("should concatenate multiple thinking blocks with newlines", func() {
content := "Text <thinking>First</thinking> Middle <thinking>Second</thinking> End"
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(Equal("First\n\nSecond"))
Expect(cleaned).To(Equal("Text Middle End"))
})
It("should handle multiple different tag types", func() {
content := "A <thinking>One</thinking> B <think>Two</think> C <think>Three</think> D"
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(ContainSubstring("One"))
Expect(reasoning).To(ContainSubstring("Two"))
Expect(reasoning).To(ContainSubstring("Three"))
Expect(cleaned).To(Equal("A B C D"))
})
It("should handle nested tags correctly (extracts first match)", func() {
content := "Text <thinking>Outer <think>Inner</think></thinking> More"
reasoning, cleaned := ExtractReasoning(content)
// Should extract the outer thinking block
Expect(reasoning).To(ContainSubstring("Outer"))
Expect(reasoning).To(ContainSubstring("Inner"))
Expect(cleaned).To(Equal("Text More"))
})
})
Context("when content has unclosed reasoning tags", func() {
It("should extract unclosed thinking block", func() {
content := "Text <thinking>Unclosed reasoning"
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(Equal("Unclosed reasoning"))
Expect(cleaned).To(Equal("Text "))
})
It("should extract unclosed think block", func() {
content := "Before <think>Incomplete"
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(Equal("Incomplete"))
Expect(cleaned).To(Equal("Before "))
})
It("should extract unclosed redacted_reasoning block", func() {
content := "Start <think>Partial reasoning content"
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(Equal("Partial reasoning content"))
Expect(cleaned).To(Equal("Start "))
})
It("should handle unclosed tag at the end", func() {
content := "Regular content <thinking>Unclosed at end"
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(Equal("Unclosed at end"))
Expect(cleaned).To(Equal("Regular content "))
})
})
Context("when content has empty reasoning blocks", func() {
It("should ignore empty thinking block", func() {
content := "Text <thinking></thinking> More"
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(BeEmpty())
Expect(cleaned).To(Equal("Text More"))
})
It("should ignore thinking block with only whitespace", func() {
content := "Text <thinking> \n\t </thinking> More"
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(BeEmpty())
Expect(cleaned).To(Equal("Text More"))
})
})
Context("when content has reasoning tags with special characters", func() {
It("should handle reasoning with newlines", func() {
content := "Before <thinking>Line 1\nLine 2\nLine 3</thinking> After"
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(Equal("Line 1\nLine 2\nLine 3"))
Expect(cleaned).To(Equal("Before After"))
})
It("should handle reasoning with code blocks", func() {
content := "Text <thinking>Reasoning with ```code``` blocks</thinking> More"
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(Equal("Reasoning with ```code``` blocks"))
Expect(cleaned).To(Equal("Text More"))
})
It("should handle reasoning with JSON", func() {
content := "Before <think>{\"key\": \"value\"}</think> After"
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(Equal("{\"key\": \"value\"}"))
Expect(cleaned).To(Equal("Before After"))
})
It("should handle reasoning with HTML-like content", func() {
content := "Text <thinking>Reasoning with <tags> inside</thinking> More"
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(Equal("Reasoning with <tags> inside"))
Expect(cleaned).To(Equal("Text More"))
})
})
Context("when content has reasoning mixed with regular content", func() {
It("should preserve content order correctly", func() {
content := "Start <thinking>Reasoning</thinking> Middle <think>More reasoning</think> End"
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(ContainSubstring("Reasoning"))
Expect(reasoning).To(ContainSubstring("More reasoning"))
Expect(cleaned).To(Equal("Start Middle End"))
})
It("should handle reasoning in the middle of a sentence", func() {
content := "This is a <thinking>reasoning</thinking> sentence."
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(Equal("reasoning"))
Expect(cleaned).To(Equal("This is a sentence."))
})
})
Context("edge cases", func() {
It("should handle content with only opening tag", func() {
content := "<thinking>"
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(BeEmpty())
Expect(cleaned).To(Equal(""))
})
It("should handle content with only closing tag", func() {
content := "</thinking>"
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(BeEmpty())
Expect(cleaned).To(Equal("</thinking>"))
})
It("should handle mismatched tags", func() {
content := "<thinking>Content</think>"
reasoning, cleaned := ExtractReasoning(content)
// Should extract unclosed thinking block
Expect(reasoning).To(ContainSubstring("Content"))
Expect(cleaned).To(Equal(""))
})
It("should handle very long reasoning content", func() {
longReasoning := strings.Repeat("This is reasoning content. ", 100)
content := "Text <thinking>" + longReasoning + "</thinking> More"
reasoning, cleaned := ExtractReasoning(content)
// TrimSpace is applied, so we need to account for that
Expect(reasoning).To(Equal(strings.TrimSpace(longReasoning)))
Expect(cleaned).To(Equal("Text More"))
})
It("should handle reasoning with unicode characters", func() {
content := "Text <thinking>Reasoning with 中文 and emoji 🧠</thinking> More"
reasoning, cleaned := ExtractReasoning(content)
Expect(reasoning).To(Equal("Reasoning with 中文 and emoji 🧠"))
Expect(cleaned).To(Equal("Text More"))
})
})
})

8
pkg/reasoning/config.go Normal file
View File

@@ -0,0 +1,8 @@
package reasoning
type ReasoningConfig struct {
// ThinkingForcedOpen indicates that the model outputs reasoning without an opening tag.
// When true, all content from the start is treated as reasoning until a closing tag is found.
// This is useful for models like GLM-4 that output reasoning without <think> but end with </think>.
ThinkingForcedOpen bool `yaml:"thinking_forced_open,omitempty" json:"thinking_forced_open,omitempty"`
}

18
pkg/reasoning/options.go Normal file
View File

@@ -0,0 +1,18 @@
package reasoning
// options holds the configuration for reasoning extraction
type options struct {
thinkingForcedOpen bool
}
// Option is a functional option for configuring reasoning extraction
type Option func(*options)
// WithThinkingForcedOpen configures the extractor to treat all content from the start
// as reasoning until a closing tag is found. This is useful for models like GLM-4
// that output reasoning without <think> but end with </think>.
func WithThinkingForcedOpen() Option {
return func(o *options) {
o.thinkingForcedOpen = true
}
}

256
pkg/reasoning/reasoning.go Normal file
View File

@@ -0,0 +1,256 @@
package reasoning
import (
"strings"
)
// Common thinking/reasoning opening tags used by various models.
// These match the tags detected by llama.cpp in common/chat.cpp
var thinkingOpenTags = []string{
// DeepSeek R1, V3.1, Nemotron V2, MiniMax M2, Hermes 2 Pro, Granite, Exaone MOE
"<think>\n",
"<think>",
// Generic thinking tags
"<thinking>\n",
"<thinking>",
// Apertus
"<|inner_prefix|>",
// Command R7B
"<|START_THINKING|>",
// Seed
"<seed:think>",
// Magistral (not in llama.cpp but common)
"[THINK]\n",
"[THINK]",
}
// DetectThinkingForcedOpen checks if a prompt ends with a thinking opening tag.
// This is used to automatically detect when the model template has already added
// the opening thinking tag, meaning the model will output reasoning content directly.
// Returns true if the prompt ends with a known thinking opening tag.
func DetectThinkingForcedOpen(prompt string) bool {
for _, tag := range thinkingOpenTags {
if strings.HasSuffix(prompt, tag) {
return true
}
}
return false
}
// Extract extracts reasoning content from thinking tags and returns
// both the extracted reasoning and the cleaned content (with tags removed).
// It handles <thinking>...</thinking> and <think>...</think> tags.
// Multiple reasoning blocks are concatenated with newlines.
// It also handles the case where only a closing tag is present (no opening tag),
// in which case everything before the closing tag is treated as reasoning.
//
// Use WithThinkingForcedOpen() option when all content from the start should be
// treated as reasoning until a closing tag is found.
func Extract(content string, opts ...Option) (reasoning string, cleanedContent string) {
if content == "" {
return "", content
}
cfg := &options{}
for _, opt := range opts {
opt(cfg)
}
if cfg.thinkingForcedOpen {
return extractForcedOpen(content)
}
return extractFromTags(content)
}
// extractForcedOpen handles the case where reasoning starts without an opening tag.
// All content from the start is treated as reasoning until a closing tag is found.
func extractForcedOpen(content string) (reasoning string, cleanedContent string) {
// Look for the earliest closing tag
// These match the closing tags used by llama.cpp for various models
closingTags := []string{
"</thinking>",
"</think>",
"<|END_THINKING|>", // Command R7B
"<|inner_suffix|>", // Apertus
"</seed:think>", // Seed
"[/THINK]", // Magistral
}
earliestCloseIdx := -1
var matchedCloseTag string
for _, closeTag := range closingTags {
idx := strings.Index(content, closeTag)
if idx != -1 && (earliestCloseIdx == -1 || idx < earliestCloseIdx) {
earliestCloseIdx = idx
matchedCloseTag = closeTag
}
}
if earliestCloseIdx == -1 {
// No closing tag found - all content is reasoning (still streaming)
return strings.TrimSpace(content), ""
}
// Found closing tag - everything before is reasoning, everything after is content
reasoning = strings.TrimSpace(content[:earliestCloseIdx])
cleanedContent = content[earliestCloseIdx+len(matchedCloseTag):]
// Continue processing the rest for any additional reasoning blocks
if cleanedContent != "" {
additionalReasoning, finalContent := extractFromTags(cleanedContent)
if additionalReasoning != "" {
if reasoning != "" {
reasoning = reasoning + "\n\n" + additionalReasoning
} else {
reasoning = additionalReasoning
}
}
cleanedContent = finalContent
}
return reasoning, cleanedContent
}
// extractFromTags extracts reasoning content from thinking tags.
// This is the core implementation that handles standard tag-based extraction.
func extractFromTags(content string) (reasoning string, cleanedContent string) {
if content == "" {
return "", content
}
var reasoningParts []string
var cleanedParts []string
remaining := content
// Define tag pairs to look for
// These match the tags used by llama.cpp for various models
tagPairs := []struct {
start string
end string
}{
{"<thinking>", "</thinking>"},
{"<think>", "</think>"},
{"<|START_THINKING|>", "<|END_THINKING|>"}, // Command R7B
{"<|inner_prefix|>", "<|inner_suffix|>"}, // Apertus
{"<seed:think>", "</seed:think>"}, // Seed
{"[THINK]", "[/THINK]"}, // Magistral
}
// Track the last position we've processed
lastPos := 0
for {
// Find the earliest tag start
earliestStart := -1
earliestEnd := -1
isUnclosed := false
isClosingOnly := false
var matchedTag struct {
start string
end string
}
for _, tagPair := range tagPairs {
startIdx := strings.Index(remaining[lastPos:], tagPair.start)
endIdx := strings.Index(remaining[lastPos:], tagPair.end)
// Check for closing-only tag (closing tag appears before or without opening tag)
if endIdx != -1 && (startIdx == -1 || endIdx < startIdx) {
// Found a closing tag without a preceding opening tag
closingTagPos := endIdx + lastPos
if earliestStart == -1 || closingTagPos < earliestStart || (isClosingOnly && closingTagPos < earliestEnd) {
earliestStart = lastPos
earliestEnd = closingTagPos + len(tagPair.end)
isClosingOnly = true
isUnclosed = false
matchedTag = tagPair
}
continue
}
if startIdx == -1 {
continue
}
startIdx += lastPos
// Find the corresponding end tag after the start tag
endIdxAfterStart := strings.Index(remaining[startIdx+len(tagPair.start):], tagPair.end)
if endIdxAfterStart == -1 {
// Unclosed tag - extract what we have
if earliestStart == -1 || startIdx < earliestStart {
earliestStart = startIdx
earliestEnd = len(remaining)
isUnclosed = true
isClosingOnly = false
matchedTag = tagPair
}
continue
}
endIdxAfterStart += startIdx + len(tagPair.start)
// Found a complete tag pair
if earliestStart == -1 || startIdx < earliestStart {
earliestStart = startIdx
earliestEnd = endIdxAfterStart + len(tagPair.end)
isUnclosed = false
isClosingOnly = false
matchedTag = tagPair
}
}
if earliestStart == -1 {
// No more tags found, add remaining content
if lastPos < len(remaining) {
cleanedParts = append(cleanedParts, remaining[lastPos:])
}
break
}
if isClosingOnly {
// Closing tag without opening tag - content before closing tag is reasoning
reasoningContent := strings.TrimSpace(remaining[lastPos : earliestEnd-len(matchedTag.end)])
if reasoningContent != "" {
reasoningParts = append(reasoningParts, reasoningContent)
}
// Move past the closing tag
lastPos = earliestEnd
continue
}
// Add content before the tag
if earliestStart > lastPos {
cleanedParts = append(cleanedParts, remaining[lastPos:earliestStart])
}
// Extract reasoning content
reasoningStart := earliestStart + len(matchedTag.start)
// For unclosed tags, earliestEnd is already at the end of the string
// For closed tags, earliestEnd points to after the closing tag, so we subtract the end tag length
var reasoningEnd int
if isUnclosed {
// Unclosed tag - extract everything to the end
reasoningEnd = len(remaining)
} else {
// Closed tag - exclude the end tag
reasoningEnd = earliestEnd - len(matchedTag.end)
}
if reasoningEnd > reasoningStart {
reasoningContent := strings.TrimSpace(remaining[reasoningStart:reasoningEnd])
if reasoningContent != "" {
reasoningParts = append(reasoningParts, reasoningContent)
}
}
// Move past this tag
lastPos = earliestEnd
}
// Combine reasoning parts
reasoning = strings.Join(reasoningParts, "\n\n")
// Combine cleaned content parts
cleanedContent = strings.Join(cleanedParts, "")
return reasoning, cleanedContent
}

View File

@@ -0,0 +1,13 @@
package reasoning_test
import (
"testing"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
func TestReasoning(t *testing.T) {
RegisterFailHandler(Fail)
RunSpecs(t, "Reasoning Suite")
}

View File

@@ -0,0 +1,499 @@
package reasoning_test
import (
"strings"
. "github.com/mudler/LocalAI/pkg/reasoning"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
var _ = Describe("DetectThinkingForcedOpen", func() {
It("should detect <think> at end of prompt", func() {
Expect(DetectThinkingForcedOpen("Some prompt<think>")).To(BeTrue())
Expect(DetectThinkingForcedOpen("Some prompt<think>\n")).To(BeTrue())
})
It("should detect <thinking> at end of prompt", func() {
Expect(DetectThinkingForcedOpen("Some prompt<thinking>")).To(BeTrue())
Expect(DetectThinkingForcedOpen("Some prompt<thinking>\n")).To(BeTrue())
})
It("should detect model-specific tags", func() {
Expect(DetectThinkingForcedOpen("Some prompt<|inner_prefix|>")).To(BeTrue())
Expect(DetectThinkingForcedOpen("Some prompt<|START_THINKING|>")).To(BeTrue())
Expect(DetectThinkingForcedOpen("Some prompt<seed:think>")).To(BeTrue())
Expect(DetectThinkingForcedOpen("Some prompt[THINK]")).To(BeTrue())
Expect(DetectThinkingForcedOpen("Some prompt[THINK]\n")).To(BeTrue())
})
It("should not detect if tag is in the middle", func() {
Expect(DetectThinkingForcedOpen("Some <think> prompt")).To(BeFalse())
Expect(DetectThinkingForcedOpen("<think>reasoning</think>")).To(BeFalse())
})
It("should not detect if no thinking tag", func() {
Expect(DetectThinkingForcedOpen("Some regular prompt")).To(BeFalse())
Expect(DetectThinkingForcedOpen("")).To(BeFalse())
})
})
var _ = Describe("Extract", func() {
Context("when content has no reasoning tags", func() {
It("should return empty reasoning and original content", func() {
content := "This is regular content without any tags."
reasoning, cleaned := Extract(content)
Expect(reasoning).To(BeEmpty())
Expect(cleaned).To(Equal(content))
})
It("should handle empty string", func() {
content := ""
reasoning, cleaned := Extract(content)
Expect(reasoning).To(BeEmpty())
Expect(cleaned).To(BeEmpty())
})
It("should handle content with only whitespace", func() {
content := " \n\t "
reasoning, cleaned := Extract(content)
Expect(reasoning).To(BeEmpty())
Expect(cleaned).To(Equal(content))
})
})
Context("when content has <thinking> tags", func() {
It("should extract reasoning from single thinking block", func() {
content := "Some text <thinking>This is my reasoning</thinking> More text"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("This is my reasoning"))
Expect(cleaned).To(Equal("Some text More text"))
})
It("should extract reasoning and preserve surrounding content", func() {
content := "Before <thinking>Reasoning here</thinking> After"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("Reasoning here"))
Expect(cleaned).To(Equal("Before After"))
})
It("should handle thinking block at the start", func() {
content := "<thinking>Start reasoning</thinking> Regular content"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("Start reasoning"))
Expect(cleaned).To(Equal(" Regular content"))
})
It("should handle thinking block at the end", func() {
content := "Regular content <thinking>End reasoning</thinking>"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("End reasoning"))
Expect(cleaned).To(Equal("Regular content "))
})
It("should handle only thinking block", func() {
content := "<thinking>Only reasoning</thinking>"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("Only reasoning"))
Expect(cleaned).To(BeEmpty())
})
It("should trim whitespace from reasoning content", func() {
content := "Text <thinking> \n Reasoning with spaces \n </thinking> More"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("Reasoning with spaces"))
Expect(cleaned).To(Equal("Text More"))
})
})
Context("when content has <think> tags", func() {
It("should extract reasoning from redacted_reasoning block", func() {
content := "Text <think>Redacted reasoning</think> More"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("Redacted reasoning"))
Expect(cleaned).To(Equal("Text More"))
})
It("should handle redacted_reasoning with multiline content", func() {
content := "Before <think>Line 1\nLine 2\nLine 3</think> After"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("Line 1\nLine 2\nLine 3"))
Expect(cleaned).To(Equal("Before After"))
})
It("should handle redacted_reasoning with complex content", func() {
content := "Start <think>Complex reasoning\nwith\nmultiple\nlines</think> End"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("Complex reasoning\nwith\nmultiple\nlines"))
Expect(cleaned).To(Equal("Start End"))
})
})
Context("when content has multiple reasoning blocks", func() {
It("should concatenate multiple thinking blocks with newlines", func() {
content := "Text <thinking>First</thinking> Middle <thinking>Second</thinking> End"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("First\n\nSecond"))
Expect(cleaned).To(Equal("Text Middle End"))
})
It("should handle multiple different tag types", func() {
content := "A <thinking>One</thinking> B <think>Two</think> C <think>Three</think> D"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(ContainSubstring("One"))
Expect(reasoning).To(ContainSubstring("Two"))
Expect(reasoning).To(ContainSubstring("Three"))
Expect(cleaned).To(Equal("A B C D"))
})
It("should handle nested tags correctly (extracts first match)", func() {
content := "Text <thinking>Outer <think>Inner</think></thinking> More"
reasoning, cleaned := Extract(content)
// Should extract the outer thinking block
Expect(reasoning).To(ContainSubstring("Outer"))
Expect(reasoning).To(ContainSubstring("Inner"))
Expect(cleaned).To(Equal("Text More"))
})
})
Context("when content has unclosed reasoning tags", func() {
It("should extract unclosed thinking block", func() {
content := "Text <thinking>Unclosed reasoning"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("Unclosed reasoning"))
Expect(cleaned).To(Equal("Text "))
})
It("should extract unclosed think block", func() {
content := "Before <think>Incomplete"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("Incomplete"))
Expect(cleaned).To(Equal("Before "))
})
It("should extract unclosed redacted_reasoning block", func() {
content := "Start <think>Partial reasoning content"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("Partial reasoning content"))
Expect(cleaned).To(Equal("Start "))
})
It("should handle unclosed tag at the end", func() {
content := "Regular content <thinking>Unclosed at end"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("Unclosed at end"))
Expect(cleaned).To(Equal("Regular content "))
})
})
Context("when content has empty reasoning blocks", func() {
It("should ignore empty thinking block", func() {
content := "Text <thinking></thinking> More"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(BeEmpty())
Expect(cleaned).To(Equal("Text More"))
})
It("should ignore thinking block with only whitespace", func() {
content := "Text <thinking> \n\t </thinking> More"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(BeEmpty())
Expect(cleaned).To(Equal("Text More"))
})
})
Context("when content has reasoning tags with special characters", func() {
It("should handle reasoning with newlines", func() {
content := "Before <thinking>Line 1\nLine 2\nLine 3</thinking> After"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("Line 1\nLine 2\nLine 3"))
Expect(cleaned).To(Equal("Before After"))
})
It("should handle reasoning with code blocks", func() {
content := "Text <thinking>Reasoning with ```code``` blocks</thinking> More"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("Reasoning with ```code``` blocks"))
Expect(cleaned).To(Equal("Text More"))
})
It("should handle reasoning with JSON", func() {
content := "Before <think>{\"key\": \"value\"}</think> After"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("{\"key\": \"value\"}"))
Expect(cleaned).To(Equal("Before After"))
})
It("should handle reasoning with HTML-like content", func() {
content := "Text <thinking>Reasoning with <tags> inside</thinking> More"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("Reasoning with <tags> inside"))
Expect(cleaned).To(Equal("Text More"))
})
})
Context("when content has reasoning mixed with regular content", func() {
It("should preserve content order correctly", func() {
content := "Start <thinking>Reasoning</thinking> Middle <think>More reasoning</think> End"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(ContainSubstring("Reasoning"))
Expect(reasoning).To(ContainSubstring("More reasoning"))
Expect(cleaned).To(Equal("Start Middle End"))
})
It("should handle reasoning in the middle of a sentence", func() {
content := "This is a <thinking>reasoning</thinking> sentence."
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("reasoning"))
Expect(cleaned).To(Equal("This is a sentence."))
})
})
Context("edge cases without WithThinkingForcedOpen", func() {
It("should handle content with only opening tag", func() {
content := "<thinking>"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(BeEmpty())
Expect(cleaned).To(Equal(""))
})
It("should handle content with only closing tag (no content before)", func() {
content := "</thinking>"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(BeEmpty())
Expect(cleaned).To(BeEmpty())
})
It("should extract reasoning when only closing tag is present", func() {
// GLM-4 style: reasoning content followed by closing tag without opening tag
content := "This is reasoning content</think>this is the actual response"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("This is reasoning content"))
Expect(cleaned).To(Equal("this is the actual response"))
})
It("should handle closing-only tag with multiline reasoning", func() {
content := "1. First point\n2. Second point\n3. Third point</think>Final answer"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("1. First point\n2. Second point\n3. Third point"))
Expect(cleaned).To(Equal("Final answer"))
})
It("should handle closing-only tag with complex reasoning (GLM-4 example)", func() {
content := "**Analyze the user's input:** The user says something.\n\n**Final Decision:** Output the text.</think>this is a test"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("**Analyze the user's input:** The user says something.\n\n**Final Decision:** Output the text."))
Expect(cleaned).To(Equal("this is a test"))
})
It("should handle closing-only thinking tag", func() {
content := "Some reasoning here</thinking>actual content"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("Some reasoning here"))
Expect(cleaned).To(Equal("actual content"))
})
It("should handle mismatched tags", func() {
content := "<thinking>Content</think>"
reasoning, cleaned := Extract(content)
// Should extract unclosed thinking block
Expect(reasoning).To(ContainSubstring("Content"))
Expect(cleaned).To(Equal(""))
})
It("should handle very long reasoning content", func() {
longReasoning := strings.Repeat("This is reasoning content. ", 100)
content := "Text <thinking>" + longReasoning + "</thinking> More"
reasoning, cleaned := Extract(content)
// TrimSpace is applied, so we need to account for that
Expect(reasoning).To(Equal(strings.TrimSpace(longReasoning)))
Expect(cleaned).To(Equal("Text More"))
})
It("should handle reasoning with unicode characters", func() {
content := "Text <thinking>Reasoning with 中文 and emoji 🧠</thinking> More"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("Reasoning with 中文 and emoji 🧠"))
Expect(cleaned).To(Equal("Text More"))
})
})
Context("with WithThinkingForcedOpen option", func() {
It("should treat all content as reasoning until closing tag", func() {
content := "This is reasoning</think>this is content"
reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
Expect(reasoning).To(Equal("This is reasoning"))
Expect(cleaned).To(Equal("this is content"))
})
It("should treat all content as reasoning when no closing tag (streaming)", func() {
content := "This is reasoning content still streaming"
reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
Expect(reasoning).To(Equal("This is reasoning content still streaming"))
Expect(cleaned).To(BeEmpty())
})
It("should handle GLM-4 style output", func() {
content := "**Analyze:** The user says something.\n\n**Final Decision:** Output the text.</think>this is a test"
reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
Expect(reasoning).To(Equal("**Analyze:** The user says something.\n\n**Final Decision:** Output the text."))
Expect(cleaned).To(Equal("this is a test"))
})
It("should handle multiline reasoning with closing tag", func() {
content := "1. First point\n2. Second point\n3. Third point</think>Final answer"
reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
Expect(reasoning).To(Equal("1. First point\n2. Second point\n3. Third point"))
Expect(cleaned).To(Equal("Final answer"))
})
It("should handle </thinking> closing tag", func() {
content := "Some reasoning here</thinking>actual content"
reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
Expect(reasoning).To(Equal("Some reasoning here"))
Expect(cleaned).To(Equal("actual content"))
})
It("should handle additional reasoning blocks after initial forced open", func() {
content := "Initial reasoning</think>content<think>more reasoning</think>final content"
reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
Expect(reasoning).To(Equal("Initial reasoning\n\nmore reasoning"))
Expect(cleaned).To(Equal("contentfinal content"))
})
It("should handle empty content", func() {
reasoning, cleaned := Extract("", WithThinkingForcedOpen())
Expect(reasoning).To(BeEmpty())
Expect(cleaned).To(BeEmpty())
})
It("should handle only closing tag", func() {
content := "</think>only content"
reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
Expect(reasoning).To(BeEmpty())
Expect(cleaned).To(Equal("only content"))
})
It("should find earliest closing tag", func() {
// </think> comes before </thinking>
content := "Reasoning</think>content</thinking>more"
reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
Expect(reasoning).To(Equal("Reasoning"))
Expect(cleaned).To(Equal("content</thinking>more"))
})
It("should handle Command R7B closing tag", func() {
content := "Reasoning content<|END_THINKING|>actual response"
reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
Expect(reasoning).To(Equal("Reasoning content"))
Expect(cleaned).To(Equal("actual response"))
})
It("should handle Apertus closing tag", func() {
content := "Reasoning content<|inner_suffix|>actual response"
reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
Expect(reasoning).To(Equal("Reasoning content"))
Expect(cleaned).To(Equal("actual response"))
})
It("should handle Seed closing tag", func() {
content := "Reasoning content</seed:think>actual response"
reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
Expect(reasoning).To(Equal("Reasoning content"))
Expect(cleaned).To(Equal("actual response"))
})
It("should handle Magistral closing tag", func() {
content := "Reasoning content[/THINK]actual response"
reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
Expect(reasoning).To(Equal("Reasoning content"))
Expect(cleaned).To(Equal("actual response"))
})
})
Context("with model-specific tag pairs", func() {
It("should extract Command R7B reasoning tags", func() {
content := "Before <|START_THINKING|>reasoning here<|END_THINKING|> After"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("reasoning here"))
Expect(cleaned).To(Equal("Before After"))
})
It("should extract Apertus reasoning tags", func() {
content := "Before <|inner_prefix|>reasoning here<|inner_suffix|> After"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("reasoning here"))
Expect(cleaned).To(Equal("Before After"))
})
It("should extract Seed reasoning tags", func() {
content := "Before <seed:think>reasoning here</seed:think> After"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("reasoning here"))
Expect(cleaned).To(Equal("Before After"))
})
It("should extract Magistral reasoning tags", func() {
content := "Before [THINK]reasoning here[/THINK] After"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("reasoning here"))
Expect(cleaned).To(Equal("Before After"))
})
It("should handle unclosed Command R7B tag", func() {
content := "Before <|START_THINKING|>reasoning still streaming"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("reasoning still streaming"))
Expect(cleaned).To(Equal("Before "))
})
It("should handle unclosed Apertus tag", func() {
content := "Before <|inner_prefix|>reasoning still streaming"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("reasoning still streaming"))
Expect(cleaned).To(Equal("Before "))
})
It("should handle unclosed Seed tag", func() {
content := "Before <seed:think>reasoning still streaming"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("reasoning still streaming"))
Expect(cleaned).To(Equal("Before "))
})
It("should handle unclosed Magistral tag", func() {
content := "Before [THINK]reasoning still streaming"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("reasoning still streaming"))
Expect(cleaned).To(Equal("Before "))
})
It("should handle closing-only Command R7B tag", func() {
content := "Reasoning content<|END_THINKING|>actual response"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("Reasoning content"))
Expect(cleaned).To(Equal("actual response"))
})
It("should handle closing-only Apertus tag", func() {
content := "Reasoning content<|inner_suffix|>actual response"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("Reasoning content"))
Expect(cleaned).To(Equal("actual response"))
})
It("should handle closing-only Seed tag", func() {
content := "Reasoning content</seed:think>actual response"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("Reasoning content"))
Expect(cleaned).To(Equal("actual response"))
})
It("should handle closing-only Magistral tag", func() {
content := "Reasoning content[/THINK]actual response"
reasoning, cleaned := Extract(content)
Expect(reasoning).To(Equal("Reasoning content"))
Expect(cleaned).To(Equal("actual response"))
})
})
})

View File

@@ -1259,6 +1259,116 @@ const docTemplate = `{
}
}
},
"/v1/responses": {
"post": {
"summary": "Create a response using the Open Responses API",
"parameters": [
{
"description": "Request body",
"name": "request",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/schema.OpenResponsesRequest"
}
}
],
"responses": {
"200": {
"description": "Response",
"schema": {
"$ref": "#/definitions/schema.ORResponseResource"
}
}
}
}
},
"/v1/responses/{id}": {
"get": {
"description": "Retrieve a response by ID. Can be used for polling background responses or resuming streaming responses.",
"summary": "Get a response by ID",
"parameters": [
{
"type": "string",
"description": "Response ID",
"name": "id",
"in": "path",
"required": true
},
{
"type": "string",
"description": "Set to 'true' to resume streaming",
"name": "stream",
"in": "query"
},
{
"type": "integer",
"description": "Sequence number to resume from (for streaming)",
"name": "starting_after",
"in": "query"
}
],
"responses": {
"200": {
"description": "Response",
"schema": {
"$ref": "#/definitions/schema.ORResponseResource"
}
},
"400": {
"description": "Bad Request",
"schema": {
"type": "object",
"additionalProperties": true
}
},
"404": {
"description": "Not Found",
"schema": {
"type": "object",
"additionalProperties": true
}
}
}
}
},
"/v1/responses/{id}/cancel": {
"post": {
"description": "Cancel a background response if it's still in progress",
"summary": "Cancel a response",
"parameters": [
{
"type": "string",
"description": "Response ID",
"name": "id",
"in": "path",
"required": true
}
],
"responses": {
"200": {
"description": "Response",
"schema": {
"$ref": "#/definitions/schema.ORResponseResource"
}
},
"400": {
"description": "Bad Request",
"schema": {
"type": "object",
"additionalProperties": true
}
},
"404": {
"description": "Not Found",
"schema": {
"type": "object",
"additionalProperties": true
}
}
}
}
},
"/v1/sound-generation": {
"post": {
"summary": "Generates audio from the input text.",
@@ -2507,6 +2617,322 @@ const docTemplate = `{
}
}
},
"schema.ORError": {
"type": "object",
"properties": {
"code": {
"type": "string"
},
"message": {
"type": "string"
},
"param": {
"type": "string"
},
"type": {
"description": "invalid_request|not_found|server_error|model_error|too_many_requests",
"type": "string"
}
}
},
"schema.ORFunctionTool": {
"type": "object",
"properties": {
"description": {
"type": "string"
},
"name": {
"type": "string"
},
"parameters": {
"type": "object",
"additionalProperties": true
},
"strict": {
"description": "Always include in response",
"type": "boolean"
},
"type": {
"description": "always \"function\"",
"type": "string"
}
}
},
"schema.ORIncompleteDetails": {
"type": "object",
"properties": {
"reason": {
"type": "string"
}
}
},
"schema.ORInputTokensDetails": {
"type": "object",
"properties": {
"cached_tokens": {
"description": "Always include, even if 0",
"type": "integer"
}
}
},
"schema.ORItemField": {
"type": "object",
"properties": {
"arguments": {
"type": "string"
},
"call_id": {
"description": "Function call fields",
"type": "string"
},
"content": {
"description": "string or []ORContentPart for messages"
},
"id": {
"description": "Present for all output items",
"type": "string"
},
"name": {
"type": "string"
},
"output": {
"description": "Function call output fields"
},
"role": {
"description": "Message fields",
"type": "string"
},
"status": {
"description": "in_progress|completed|incomplete",
"type": "string"
},
"type": {
"description": "message|function_call|function_call_output|reasoning|item_reference",
"type": "string"
}
}
},
"schema.OROutputTokensDetails": {
"type": "object",
"properties": {
"reasoning_tokens": {
"description": "Always include, even if 0",
"type": "integer"
}
}
},
"schema.ORReasoning": {
"type": "object",
"properties": {
"effort": {
"type": "string"
},
"summary": {
"type": "string"
}
}
},
"schema.ORReasoningParam": {
"type": "object",
"properties": {
"effort": {
"description": "\"none\"|\"low\"|\"medium\"|\"high\"|\"xhigh\"",
"type": "string"
},
"summary": {
"description": "\"auto\"|\"concise\"|\"detailed\"",
"type": "string"
}
}
},
"schema.ORResponseResource": {
"type": "object",
"properties": {
"background": {
"type": "boolean"
},
"completed_at": {
"description": "Required: present as number or null",
"type": "integer"
},
"created_at": {
"type": "integer"
},
"error": {
"description": "Always present, null if no error",
"allOf": [
{
"$ref": "#/definitions/schema.ORError"
}
]
},
"frequency_penalty": {
"type": "number"
},
"id": {
"type": "string"
},
"incomplete_details": {
"description": "Always present, null if complete",
"allOf": [
{
"$ref": "#/definitions/schema.ORIncompleteDetails"
}
]
},
"instructions": {
"type": "string"
},
"max_output_tokens": {
"type": "integer"
},
"max_tool_calls": {
"description": "nullable",
"type": "integer"
},
"metadata": {
"description": "Metadata and operational flags",
"type": "object",
"additionalProperties": {
"type": "string"
}
},
"model": {
"type": "string"
},
"object": {
"description": "always \"response\"",
"type": "string"
},
"output": {
"type": "array",
"items": {
"$ref": "#/definitions/schema.ORItemField"
}
},
"parallel_tool_calls": {
"type": "boolean"
},
"presence_penalty": {
"type": "number"
},
"previous_response_id": {
"type": "string"
},
"prompt_cache_key": {
"description": "nullable",
"type": "string"
},
"reasoning": {
"description": "nullable",
"allOf": [
{
"$ref": "#/definitions/schema.ORReasoning"
}
]
},
"safety_identifier": {
"description": "Safety and caching",
"type": "string"
},
"service_tier": {
"type": "string"
},
"status": {
"description": "in_progress|completed|failed|incomplete",
"type": "string"
},
"store": {
"type": "boolean"
},
"temperature": {
"description": "Sampling parameters (always required)",
"type": "number"
},
"text": {
"description": "Text format configuration",
"allOf": [
{
"$ref": "#/definitions/schema.ORTextConfig"
}
]
},
"tool_choice": {},
"tools": {
"description": "Tool-related fields",
"type": "array",
"items": {
"$ref": "#/definitions/schema.ORFunctionTool"
}
},
"top_logprobs": {
"description": "Default to 0",
"type": "integer"
},
"top_p": {
"type": "number"
},
"truncation": {
"description": "Truncation and reasoning",
"type": "string"
},
"usage": {
"description": "Usage statistics",
"allOf": [
{
"$ref": "#/definitions/schema.ORUsage"
}
]
}
}
},
"schema.ORTextConfig": {
"type": "object",
"properties": {
"format": {
"$ref": "#/definitions/schema.ORTextFormat"
}
}
},
"schema.ORTextFormat": {
"type": "object",
"properties": {
"type": {
"description": "\"text\" or \"json_schema\"",
"type": "string"
}
}
},
"schema.ORUsage": {
"type": "object",
"properties": {
"input_tokens": {
"type": "integer"
},
"input_tokens_details": {
"description": "Always present",
"allOf": [
{
"$ref": "#/definitions/schema.ORInputTokensDetails"
}
]
},
"output_tokens": {
"type": "integer"
},
"output_tokens_details": {
"description": "Always present",
"allOf": [
{
"$ref": "#/definitions/schema.OROutputTokensDetails"
}
]
},
"total_tokens": {
"type": "integer"
}
}
},
"schema.OpenAIModel": {
"type": "object",
"properties": {
@@ -2781,6 +3207,114 @@ const docTemplate = `{
}
}
},
"schema.OpenResponsesRequest": {
"type": "object",
"properties": {
"allowed_tools": {
"description": "Restrict which tools can be invoked",
"type": "array",
"items": {
"type": "string"
}
},
"background": {
"description": "Run request in background",
"type": "boolean"
},
"frequency_penalty": {
"description": "Frequency penalty (-2.0 to 2.0)",
"type": "number"
},
"include": {
"description": "What to include in response",
"type": "array",
"items": {
"type": "string"
}
},
"input": {
"description": "string or []ORItemParam"
},
"instructions": {
"type": "string"
},
"logit_bias": {
"description": "OpenAI-compatible extensions (not in Open Responses spec)",
"type": "object",
"additionalProperties": {
"type": "number",
"format": "float64"
}
},
"max_output_tokens": {
"type": "integer"
},
"max_tool_calls": {
"description": "Maximum number of tool calls",
"type": "integer"
},
"metadata": {
"type": "object",
"additionalProperties": {
"type": "string"
}
},
"model": {
"type": "string"
},
"parallel_tool_calls": {
"description": "Allow parallel tool calls",
"type": "boolean"
},
"presence_penalty": {
"description": "Presence penalty (-2.0 to 2.0)",
"type": "number"
},
"previous_response_id": {
"type": "string"
},
"reasoning": {
"$ref": "#/definitions/schema.ORReasoningParam"
},
"service_tier": {
"description": "\"auto\"|\"default\"|priority hint",
"type": "string"
},
"store": {
"description": "Whether to store the response",
"type": "boolean"
},
"stream": {
"type": "boolean"
},
"temperature": {
"type": "number"
},
"text_format": {
"description": "Additional parameters from spec"
},
"tool_choice": {
"description": "\"auto\"|\"required\"|\"none\"|{type:\"function\",name:\"...\"}"
},
"tools": {
"type": "array",
"items": {
"$ref": "#/definitions/schema.ORFunctionTool"
}
},
"top_logprobs": {
"description": "Number of top logprobs to return",
"type": "integer"
},
"top_p": {
"type": "number"
},
"truncation": {
"description": "\"auto\"|\"disabled\"",
"type": "string"
}
}
},
"schema.P2PNodesResponse": {
"type": "object",
"properties": {

View File

@@ -1252,6 +1252,116 @@
}
}
},
"/v1/responses": {
"post": {
"summary": "Create a response using the Open Responses API",
"parameters": [
{
"description": "Request body",
"name": "request",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/schema.OpenResponsesRequest"
}
}
],
"responses": {
"200": {
"description": "Response",
"schema": {
"$ref": "#/definitions/schema.ORResponseResource"
}
}
}
}
},
"/v1/responses/{id}": {
"get": {
"description": "Retrieve a response by ID. Can be used for polling background responses or resuming streaming responses.",
"summary": "Get a response by ID",
"parameters": [
{
"type": "string",
"description": "Response ID",
"name": "id",
"in": "path",
"required": true
},
{
"type": "string",
"description": "Set to 'true' to resume streaming",
"name": "stream",
"in": "query"
},
{
"type": "integer",
"description": "Sequence number to resume from (for streaming)",
"name": "starting_after",
"in": "query"
}
],
"responses": {
"200": {
"description": "Response",
"schema": {
"$ref": "#/definitions/schema.ORResponseResource"
}
},
"400": {
"description": "Bad Request",
"schema": {
"type": "object",
"additionalProperties": true
}
},
"404": {
"description": "Not Found",
"schema": {
"type": "object",
"additionalProperties": true
}
}
}
}
},
"/v1/responses/{id}/cancel": {
"post": {
"description": "Cancel a background response if it's still in progress",
"summary": "Cancel a response",
"parameters": [
{
"type": "string",
"description": "Response ID",
"name": "id",
"in": "path",
"required": true
}
],
"responses": {
"200": {
"description": "Response",
"schema": {
"$ref": "#/definitions/schema.ORResponseResource"
}
},
"400": {
"description": "Bad Request",
"schema": {
"type": "object",
"additionalProperties": true
}
},
"404": {
"description": "Not Found",
"schema": {
"type": "object",
"additionalProperties": true
}
}
}
}
},
"/v1/sound-generation": {
"post": {
"summary": "Generates audio from the input text.",
@@ -2500,6 +2610,322 @@
}
}
},
"schema.ORError": {
"type": "object",
"properties": {
"code": {
"type": "string"
},
"message": {
"type": "string"
},
"param": {
"type": "string"
},
"type": {
"description": "invalid_request|not_found|server_error|model_error|too_many_requests",
"type": "string"
}
}
},
"schema.ORFunctionTool": {
"type": "object",
"properties": {
"description": {
"type": "string"
},
"name": {
"type": "string"
},
"parameters": {
"type": "object",
"additionalProperties": true
},
"strict": {
"description": "Always include in response",
"type": "boolean"
},
"type": {
"description": "always \"function\"",
"type": "string"
}
}
},
"schema.ORIncompleteDetails": {
"type": "object",
"properties": {
"reason": {
"type": "string"
}
}
},
"schema.ORInputTokensDetails": {
"type": "object",
"properties": {
"cached_tokens": {
"description": "Always include, even if 0",
"type": "integer"
}
}
},
"schema.ORItemField": {
"type": "object",
"properties": {
"arguments": {
"type": "string"
},
"call_id": {
"description": "Function call fields",
"type": "string"
},
"content": {
"description": "string or []ORContentPart for messages"
},
"id": {
"description": "Present for all output items",
"type": "string"
},
"name": {
"type": "string"
},
"output": {
"description": "Function call output fields"
},
"role": {
"description": "Message fields",
"type": "string"
},
"status": {
"description": "in_progress|completed|incomplete",
"type": "string"
},
"type": {
"description": "message|function_call|function_call_output|reasoning|item_reference",
"type": "string"
}
}
},
"schema.OROutputTokensDetails": {
"type": "object",
"properties": {
"reasoning_tokens": {
"description": "Always include, even if 0",
"type": "integer"
}
}
},
"schema.ORReasoning": {
"type": "object",
"properties": {
"effort": {
"type": "string"
},
"summary": {
"type": "string"
}
}
},
"schema.ORReasoningParam": {
"type": "object",
"properties": {
"effort": {
"description": "\"none\"|\"low\"|\"medium\"|\"high\"|\"xhigh\"",
"type": "string"
},
"summary": {
"description": "\"auto\"|\"concise\"|\"detailed\"",
"type": "string"
}
}
},
"schema.ORResponseResource": {
"type": "object",
"properties": {
"background": {
"type": "boolean"
},
"completed_at": {
"description": "Required: present as number or null",
"type": "integer"
},
"created_at": {
"type": "integer"
},
"error": {
"description": "Always present, null if no error",
"allOf": [
{
"$ref": "#/definitions/schema.ORError"
}
]
},
"frequency_penalty": {
"type": "number"
},
"id": {
"type": "string"
},
"incomplete_details": {
"description": "Always present, null if complete",
"allOf": [
{
"$ref": "#/definitions/schema.ORIncompleteDetails"
}
]
},
"instructions": {
"type": "string"
},
"max_output_tokens": {
"type": "integer"
},
"max_tool_calls": {
"description": "nullable",
"type": "integer"
},
"metadata": {
"description": "Metadata and operational flags",
"type": "object",
"additionalProperties": {
"type": "string"
}
},
"model": {
"type": "string"
},
"object": {
"description": "always \"response\"",
"type": "string"
},
"output": {
"type": "array",
"items": {
"$ref": "#/definitions/schema.ORItemField"
}
},
"parallel_tool_calls": {
"type": "boolean"
},
"presence_penalty": {
"type": "number"
},
"previous_response_id": {
"type": "string"
},
"prompt_cache_key": {
"description": "nullable",
"type": "string"
},
"reasoning": {
"description": "nullable",
"allOf": [
{
"$ref": "#/definitions/schema.ORReasoning"
}
]
},
"safety_identifier": {
"description": "Safety and caching",
"type": "string"
},
"service_tier": {
"type": "string"
},
"status": {
"description": "in_progress|completed|failed|incomplete",
"type": "string"
},
"store": {
"type": "boolean"
},
"temperature": {
"description": "Sampling parameters (always required)",
"type": "number"
},
"text": {
"description": "Text format configuration",
"allOf": [
{
"$ref": "#/definitions/schema.ORTextConfig"
}
]
},
"tool_choice": {},
"tools": {
"description": "Tool-related fields",
"type": "array",
"items": {
"$ref": "#/definitions/schema.ORFunctionTool"
}
},
"top_logprobs": {
"description": "Default to 0",
"type": "integer"
},
"top_p": {
"type": "number"
},
"truncation": {
"description": "Truncation and reasoning",
"type": "string"
},
"usage": {
"description": "Usage statistics",
"allOf": [
{
"$ref": "#/definitions/schema.ORUsage"
}
]
}
}
},
"schema.ORTextConfig": {
"type": "object",
"properties": {
"format": {
"$ref": "#/definitions/schema.ORTextFormat"
}
}
},
"schema.ORTextFormat": {
"type": "object",
"properties": {
"type": {
"description": "\"text\" or \"json_schema\"",
"type": "string"
}
}
},
"schema.ORUsage": {
"type": "object",
"properties": {
"input_tokens": {
"type": "integer"
},
"input_tokens_details": {
"description": "Always present",
"allOf": [
{
"$ref": "#/definitions/schema.ORInputTokensDetails"
}
]
},
"output_tokens": {
"type": "integer"
},
"output_tokens_details": {
"description": "Always present",
"allOf": [
{
"$ref": "#/definitions/schema.OROutputTokensDetails"
}
]
},
"total_tokens": {
"type": "integer"
}
}
},
"schema.OpenAIModel": {
"type": "object",
"properties": {
@@ -2774,6 +3200,114 @@
}
}
},
"schema.OpenResponsesRequest": {
"type": "object",
"properties": {
"allowed_tools": {
"description": "Restrict which tools can be invoked",
"type": "array",
"items": {
"type": "string"
}
},
"background": {
"description": "Run request in background",
"type": "boolean"
},
"frequency_penalty": {
"description": "Frequency penalty (-2.0 to 2.0)",
"type": "number"
},
"include": {
"description": "What to include in response",
"type": "array",
"items": {
"type": "string"
}
},
"input": {
"description": "string or []ORItemParam"
},
"instructions": {
"type": "string"
},
"logit_bias": {
"description": "OpenAI-compatible extensions (not in Open Responses spec)",
"type": "object",
"additionalProperties": {
"type": "number",
"format": "float64"
}
},
"max_output_tokens": {
"type": "integer"
},
"max_tool_calls": {
"description": "Maximum number of tool calls",
"type": "integer"
},
"metadata": {
"type": "object",
"additionalProperties": {
"type": "string"
}
},
"model": {
"type": "string"
},
"parallel_tool_calls": {
"description": "Allow parallel tool calls",
"type": "boolean"
},
"presence_penalty": {
"description": "Presence penalty (-2.0 to 2.0)",
"type": "number"
},
"previous_response_id": {
"type": "string"
},
"reasoning": {
"$ref": "#/definitions/schema.ORReasoningParam"
},
"service_tier": {
"description": "\"auto\"|\"default\"|priority hint",
"type": "string"
},
"store": {
"description": "Whether to store the response",
"type": "boolean"
},
"stream": {
"type": "boolean"
},
"temperature": {
"type": "number"
},
"text_format": {
"description": "Additional parameters from spec"
},
"tool_choice": {
"description": "\"auto\"|\"required\"|\"none\"|{type:\"function\",name:\"...\"}"
},
"tools": {
"type": "array",
"items": {
"$ref": "#/definitions/schema.ORFunctionTool"
}
},
"top_logprobs": {
"description": "Number of top logprobs to return",
"type": "integer"
},
"top_p": {
"type": "number"
},
"truncation": {
"description": "\"auto\"|\"disabled\"",
"type": "string"
}
}
},
"schema.P2PNodesResponse": {
"type": "object",
"properties": {

View File

@@ -742,6 +742,212 @@ definitions:
tunnelAddress:
type: string
type: object
schema.ORError:
properties:
code:
type: string
message:
type: string
param:
type: string
type:
description: invalid_request|not_found|server_error|model_error|too_many_requests
type: string
type: object
schema.ORFunctionTool:
properties:
description:
type: string
name:
type: string
parameters:
additionalProperties: true
type: object
strict:
description: Always include in response
type: boolean
type:
description: always "function"
type: string
type: object
schema.ORIncompleteDetails:
properties:
reason:
type: string
type: object
schema.ORInputTokensDetails:
properties:
cached_tokens:
description: Always include, even if 0
type: integer
type: object
schema.ORItemField:
properties:
arguments:
type: string
call_id:
description: Function call fields
type: string
content:
description: string or []ORContentPart for messages
id:
description: Present for all output items
type: string
name:
type: string
output:
description: Function call output fields
role:
description: Message fields
type: string
status:
description: in_progress|completed|incomplete
type: string
type:
description: message|function_call|function_call_output|reasoning|item_reference
type: string
type: object
schema.OROutputTokensDetails:
properties:
reasoning_tokens:
description: Always include, even if 0
type: integer
type: object
schema.ORReasoning:
properties:
effort:
type: string
summary:
type: string
type: object
schema.ORReasoningParam:
properties:
effort:
description: '"none"|"low"|"medium"|"high"|"xhigh"'
type: string
summary:
description: '"auto"|"concise"|"detailed"'
type: string
type: object
schema.ORResponseResource:
properties:
background:
type: boolean
completed_at:
description: 'Required: present as number or null'
type: integer
created_at:
type: integer
error:
allOf:
- $ref: '#/definitions/schema.ORError'
description: Always present, null if no error
frequency_penalty:
type: number
id:
type: string
incomplete_details:
allOf:
- $ref: '#/definitions/schema.ORIncompleteDetails'
description: Always present, null if complete
instructions:
type: string
max_output_tokens:
type: integer
max_tool_calls:
description: nullable
type: integer
metadata:
additionalProperties:
type: string
description: Metadata and operational flags
type: object
model:
type: string
object:
description: always "response"
type: string
output:
items:
$ref: '#/definitions/schema.ORItemField'
type: array
parallel_tool_calls:
type: boolean
presence_penalty:
type: number
previous_response_id:
type: string
prompt_cache_key:
description: nullable
type: string
reasoning:
allOf:
- $ref: '#/definitions/schema.ORReasoning'
description: nullable
safety_identifier:
description: Safety and caching
type: string
service_tier:
type: string
status:
description: in_progress|completed|failed|incomplete
type: string
store:
type: boolean
temperature:
description: Sampling parameters (always required)
type: number
text:
allOf:
- $ref: '#/definitions/schema.ORTextConfig'
description: Text format configuration
tool_choice: {}
tools:
description: Tool-related fields
items:
$ref: '#/definitions/schema.ORFunctionTool'
type: array
top_logprobs:
description: Default to 0
type: integer
top_p:
type: number
truncation:
description: Truncation and reasoning
type: string
usage:
allOf:
- $ref: '#/definitions/schema.ORUsage'
description: Usage statistics
type: object
schema.ORTextConfig:
properties:
format:
$ref: '#/definitions/schema.ORTextFormat'
type: object
schema.ORTextFormat:
properties:
type:
description: '"text" or "json_schema"'
type: string
type: object
schema.ORUsage:
properties:
input_tokens:
type: integer
input_tokens_details:
allOf:
- $ref: '#/definitions/schema.ORInputTokensDetails'
description: Always present
output_tokens:
type: integer
output_tokens_details:
allOf:
- $ref: '#/definitions/schema.OROutputTokensDetails'
description: Always present
total_tokens:
type: integer
type: object
schema.OpenAIModel:
properties:
id:
@@ -936,6 +1142,82 @@ definitions:
total_tokens:
type: integer
type: object
schema.OpenResponsesRequest:
properties:
allowed_tools:
description: Restrict which tools can be invoked
items:
type: string
type: array
background:
description: Run request in background
type: boolean
frequency_penalty:
description: Frequency penalty (-2.0 to 2.0)
type: number
include:
description: What to include in response
items:
type: string
type: array
input:
description: string or []ORItemParam
instructions:
type: string
logit_bias:
additionalProperties:
format: float64
type: number
description: OpenAI-compatible extensions (not in Open Responses spec)
type: object
max_output_tokens:
type: integer
max_tool_calls:
description: Maximum number of tool calls
type: integer
metadata:
additionalProperties:
type: string
type: object
model:
type: string
parallel_tool_calls:
description: Allow parallel tool calls
type: boolean
presence_penalty:
description: Presence penalty (-2.0 to 2.0)
type: number
previous_response_id:
type: string
reasoning:
$ref: '#/definitions/schema.ORReasoningParam'
service_tier:
description: '"auto"|"default"|priority hint'
type: string
store:
description: Whether to store the response
type: boolean
stream:
type: boolean
temperature:
type: number
text_format:
description: Additional parameters from spec
tool_choice:
description: '"auto"|"required"|"none"|{type:"function",name:"..."}'
tools:
items:
$ref: '#/definitions/schema.ORFunctionTool'
type: array
top_logprobs:
description: Number of top logprobs to return
type: integer
top_p:
type: number
truncation:
description: '"auto"|"disabled"'
type: string
type: object
schema.P2PNodesResponse:
properties:
federated_nodes:
@@ -1962,6 +2244,80 @@ paths:
schema:
$ref: '#/definitions/schema.JINARerankResponse'
summary: Reranks a list of phrases by relevance to a given text query.
/v1/responses:
post:
parameters:
- description: Request body
in: body
name: request
required: true
schema:
$ref: '#/definitions/schema.OpenResponsesRequest'
responses:
"200":
description: Response
schema:
$ref: '#/definitions/schema.ORResponseResource'
summary: Create a response using the Open Responses API
/v1/responses/{id}:
get:
description: Retrieve a response by ID. Can be used for polling background responses
or resuming streaming responses.
parameters:
- description: Response ID
in: path
name: id
required: true
type: string
- description: Set to 'true' to resume streaming
in: query
name: stream
type: string
- description: Sequence number to resume from (for streaming)
in: query
name: starting_after
type: integer
responses:
"200":
description: Response
schema:
$ref: '#/definitions/schema.ORResponseResource'
"400":
description: Bad Request
schema:
additionalProperties: true
type: object
"404":
description: Not Found
schema:
additionalProperties: true
type: object
summary: Get a response by ID
/v1/responses/{id}/cancel:
post:
description: Cancel a background response if it's still in progress
parameters:
- description: Response ID
in: path
name: id
required: true
type: string
responses:
"200":
description: Response
schema:
$ref: '#/definitions/schema.ORResponseResource'
"400":
description: Bad Request
schema:
additionalProperties: true
type: object
"404":
description: Not Found
schema:
additionalProperties: true
type: object
summary: Cancel a response
/v1/sound-generation:
post:
parameters: