mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-05 04:02:45 -05:00
Compare commits
12 Commits
v3.10.0
...
fix/reason
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
61a6e95f7d | ||
|
|
a352125726 | ||
|
|
187e474daf | ||
|
|
4bf2f8bbd8 | ||
|
|
d3525b7509 | ||
|
|
c8aa821e0e | ||
|
|
b3191927ae | ||
|
|
54c5a2d9ea | ||
|
|
0279591fec | ||
|
|
8845186955 | ||
|
|
ab8ed24358 | ||
|
|
a021df5a88 |
@@ -1,5 +1,5 @@
|
||||
|
||||
LLAMA_VERSION?=2fbde785bc106ae1c4102b0e82b9b41d9c466579
|
||||
LLAMA_VERSION?=959ecf7f234dc0bc0cd6829b25cb0ee1481aa78a
|
||||
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
|
||||
|
||||
CMAKE_ARGS?=
|
||||
|
||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
||||
|
||||
# stablediffusion.cpp (ggml)
|
||||
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
|
||||
STABLEDIFFUSION_GGML_VERSION?=9565c7f6bd5fcff124c589147b2621244f2c4aa1
|
||||
STABLEDIFFUSION_GGML_VERSION?=a48b4a3ade9972faf0adcad47e51c6fc03f0e46d
|
||||
|
||||
CMAKE_ARGS+=-DGGML_MAX_NAME=128
|
||||
|
||||
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/pkg/downloader"
|
||||
"github.com/mudler/LocalAI/pkg/functions"
|
||||
"github.com/mudler/LocalAI/pkg/reasoning"
|
||||
"github.com/mudler/cogito"
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
@@ -51,6 +52,7 @@ type ModelConfig struct {
|
||||
ResponseFormatMap map[string]interface{} `yaml:"-" json:"-"`
|
||||
|
||||
FunctionsConfig functions.FunctionsConfig `yaml:"function,omitempty" json:"function,omitempty"`
|
||||
ReasoningConfig reasoning.ReasoningConfig `yaml:"reasoning,omitempty" json:"reasoning,omitempty"`
|
||||
|
||||
FeatureFlag FeatureFlag `yaml:"feature_flags,omitempty" json:"feature_flags,omitempty"` // Feature Flag registry. We move fast, and features may break on a per model/backend basis. Registry for (usually temporary) flags that indicate aborting something early.
|
||||
// LLM configs (GPT4ALL, Llama.cpp, ...)
|
||||
|
||||
@@ -13,6 +13,7 @@ import (
|
||||
"github.com/mudler/LocalAI/core/http/middleware"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/pkg/functions"
|
||||
"github.com/mudler/LocalAI/pkg/reasoning"
|
||||
|
||||
"github.com/mudler/LocalAI/core/templates"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
@@ -43,10 +44,19 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
|
||||
lastEmittedReasoning := ""
|
||||
lastEmittedCleanedContent := ""
|
||||
|
||||
// Configure reasoning extraction options
|
||||
// Auto-detect if prompt ends with thinking tag
|
||||
// or use explicit config setting
|
||||
thinkingForcedOpen := config.ReasoningConfig.ThinkingForcedOpen || reasoning.DetectThinkingForcedOpen(s)
|
||||
|
||||
_, _, err := ComputeChoices(req, s, config, cl, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, tokenUsage backend.TokenUsage) bool {
|
||||
accumulatedContent += s
|
||||
// Extract reasoning from accumulated content
|
||||
currentReasoning, cleanedContent := functions.ExtractReasoning(accumulatedContent)
|
||||
opts := []reasoning.Option{}
|
||||
if thinkingForcedOpen {
|
||||
opts = append(opts, reasoning.WithThinkingForcedOpen())
|
||||
}
|
||||
currentReasoning, cleanedContent := reasoning.Extract(accumulatedContent, opts...)
|
||||
|
||||
// Calculate new reasoning delta (what we haven't emitted yet)
|
||||
var reasoningDelta *string
|
||||
@@ -230,7 +240,13 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
|
||||
return err
|
||||
}
|
||||
// Extract reasoning before processing tool calls
|
||||
reasoning, cleanedResult := functions.ExtractReasoning(result)
|
||||
// Auto-detect if prompt ends with thinking tag or use explicit config
|
||||
toolsThinkingForcedOpen := config.ReasoningConfig.ThinkingForcedOpen || reasoning.DetectThinkingForcedOpen(prompt)
|
||||
opts := []reasoning.Option{}
|
||||
if toolsThinkingForcedOpen {
|
||||
opts = append(opts, reasoning.WithThinkingForcedOpen())
|
||||
}
|
||||
extractedReasoning, cleanedResult := reasoning.Extract(result, opts...)
|
||||
result = cleanedResult
|
||||
|
||||
textContentToReturn = functions.ParseTextContent(result, config.FunctionsConfig)
|
||||
@@ -266,8 +282,8 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
|
||||
}
|
||||
|
||||
var deltaReasoning *string
|
||||
if reasoning != "" {
|
||||
deltaReasoning = &reasoning
|
||||
if extractedReasoning != "" {
|
||||
deltaReasoning = &extractedReasoning
|
||||
}
|
||||
delta := &schema.Message{Content: &result}
|
||||
if deltaReasoning != nil {
|
||||
@@ -618,17 +634,24 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
|
||||
// no streaming mode
|
||||
default:
|
||||
|
||||
// Auto-detect if prompt ends with thinking tag for non-streaming mode
|
||||
nonStreamThinkingForcedOpen := config.ReasoningConfig.ThinkingForcedOpen || reasoning.DetectThinkingForcedOpen(predInput)
|
||||
|
||||
tokenCallback := func(s string, c *[]schema.Choice) {
|
||||
// Extract reasoning from the response
|
||||
reasoning, cleanedS := functions.ExtractReasoning(s)
|
||||
s = cleanedS
|
||||
var extractedReasoning string
|
||||
opts := []reasoning.Option{}
|
||||
if nonStreamThinkingForcedOpen {
|
||||
opts = append(opts, reasoning.WithThinkingForcedOpen())
|
||||
}
|
||||
extractedReasoning, s = reasoning.Extract(s, opts...)
|
||||
|
||||
if !shouldUseFn {
|
||||
// no function is called, just reply and use stop as finish reason
|
||||
stopReason := FinishReasonStop
|
||||
message := &schema.Message{Role: "assistant", Content: &s}
|
||||
if reasoning != "" {
|
||||
message.Reasoning = &reasoning
|
||||
if extractedReasoning != "" {
|
||||
message.Reasoning = &extractedReasoning
|
||||
}
|
||||
*c = append(*c, schema.Choice{FinishReason: &stopReason, Index: 0, Message: message})
|
||||
return
|
||||
@@ -650,8 +673,8 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
|
||||
|
||||
stopReason := FinishReasonStop
|
||||
message := &schema.Message{Role: "assistant", Content: &result}
|
||||
if reasoning != "" {
|
||||
message.Reasoning = &reasoning
|
||||
if extractedReasoning != "" {
|
||||
message.Reasoning = &extractedReasoning
|
||||
}
|
||||
*c = append(*c, schema.Choice{
|
||||
FinishReason: &stopReason,
|
||||
@@ -664,8 +687,8 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
|
||||
Role: "assistant",
|
||||
},
|
||||
}
|
||||
if reasoning != "" {
|
||||
toolChoice.Message.Reasoning = &reasoning
|
||||
if extractedReasoning != "" {
|
||||
toolChoice.Message.Reasoning = &extractedReasoning
|
||||
}
|
||||
|
||||
for _, ss := range results {
|
||||
@@ -695,8 +718,8 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
|
||||
"arguments": args,
|
||||
},
|
||||
}
|
||||
if reasoning != "" {
|
||||
message.Reasoning = &reasoning
|
||||
if extractedReasoning != "" {
|
||||
message.Reasoning = &extractedReasoning
|
||||
}
|
||||
*c = append(*c, schema.Choice{
|
||||
FinishReason: &functionCallReason,
|
||||
|
||||
@@ -72,6 +72,359 @@ You can list all the models available with:
|
||||
curl http://localhost:8080/v1/models
|
||||
```
|
||||
|
||||
### Anthropic Messages API
|
||||
|
||||
LocalAI supports the Anthropic Messages API, which is compatible with Claude clients. This endpoint provides a structured way to send messages and receive responses, with support for tools, streaming, and multimodal content.
|
||||
|
||||
**Endpoint:** `POST /v1/messages` or `POST /messages`
|
||||
|
||||
**Reference:** https://docs.anthropic.com/claude/reference/messages_post
|
||||
|
||||
#### Basic Usage
|
||||
|
||||
```bash
|
||||
curl http://localhost:8080/v1/messages \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "anthropic-version: 2023-06-01" \
|
||||
-d '{
|
||||
"model": "ggml-koala-7b-model-q4_0-r2.bin",
|
||||
"max_tokens": 1024,
|
||||
"messages": [
|
||||
{"role": "user", "content": "Say this is a test!"}
|
||||
]
|
||||
}'
|
||||
```
|
||||
|
||||
#### Request Parameters
|
||||
|
||||
| Parameter | Type | Required | Description |
|
||||
|-----------|------|----------|-------------|
|
||||
| `model` | string | Yes | The model identifier |
|
||||
| `messages` | array | Yes | Array of message objects with `role` and `content` |
|
||||
| `max_tokens` | integer | Yes | Maximum number of tokens to generate (must be > 0) |
|
||||
| `system` | string | No | System message to set the assistant's behavior |
|
||||
| `temperature` | float | No | Sampling temperature (0.0 to 1.0) |
|
||||
| `top_p` | float | No | Nucleus sampling parameter |
|
||||
| `top_k` | integer | No | Top-k sampling parameter |
|
||||
| `stop_sequences` | array | No | Array of strings that will stop generation |
|
||||
| `stream` | boolean | No | Enable streaming responses |
|
||||
| `tools` | array | No | Array of tool definitions for function calling |
|
||||
| `tool_choice` | string/object | No | Tool choice strategy: "auto", "any", "none", or specific tool |
|
||||
| `metadata` | object | No | Custom metadata to attach to the request |
|
||||
|
||||
#### Message Format
|
||||
|
||||
Messages can contain text or structured content blocks:
|
||||
|
||||
```bash
|
||||
curl http://localhost:8080/v1/messages \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"model": "ggml-koala-7b-model-q4_0-r2.bin",
|
||||
"max_tokens": 1024,
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "What is in this image?"
|
||||
},
|
||||
{
|
||||
"type": "image",
|
||||
"source": {
|
||||
"type": "base64",
|
||||
"media_type": "image/jpeg",
|
||||
"data": "base64_encoded_image_data"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}'
|
||||
```
|
||||
|
||||
#### Tool Calling
|
||||
|
||||
The Anthropic API supports function calling through tools:
|
||||
|
||||
```bash
|
||||
curl http://localhost:8080/v1/messages \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"model": "ggml-koala-7b-model-q4_0-r2.bin",
|
||||
"max_tokens": 1024,
|
||||
"tools": [
|
||||
{
|
||||
"name": "get_weather",
|
||||
"description": "Get the current weather",
|
||||
"input_schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state"
|
||||
}
|
||||
},
|
||||
"required": ["location"]
|
||||
}
|
||||
}
|
||||
],
|
||||
"tool_choice": "auto",
|
||||
"messages": [
|
||||
{"role": "user", "content": "What is the weather in San Francisco?"}
|
||||
]
|
||||
}'
|
||||
```
|
||||
|
||||
#### Streaming
|
||||
|
||||
Enable streaming responses by setting `stream: true`:
|
||||
|
||||
```bash
|
||||
curl http://localhost:8080/v1/messages \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"model": "ggml-koala-7b-model-q4_0-r2.bin",
|
||||
"max_tokens": 1024,
|
||||
"stream": true,
|
||||
"messages": [
|
||||
{"role": "user", "content": "Tell me a story"}
|
||||
]
|
||||
}'
|
||||
```
|
||||
|
||||
Streaming responses use Server-Sent Events (SSE) format with event types: `message_start`, `content_block_start`, `content_block_delta`, `content_block_stop`, `message_delta`, and `message_stop`.
|
||||
|
||||
#### Response Format
|
||||
|
||||
```json
|
||||
{
|
||||
"id": "msg_abc123",
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "This is a test!"
|
||||
}
|
||||
],
|
||||
"model": "ggml-koala-7b-model-q4_0-r2.bin",
|
||||
"stop_reason": "end_turn",
|
||||
"usage": {
|
||||
"input_tokens": 10,
|
||||
"output_tokens": 5
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Open Responses API
|
||||
|
||||
LocalAI supports the Open Responses API specification, which provides a standardized interface for AI model interactions with support for background processing, streaming, tool calling, and advanced features like reasoning.
|
||||
|
||||
**Endpoint:** `POST /v1/responses` or `POST /responses`
|
||||
|
||||
**Reference:** https://www.openresponses.org/specification
|
||||
|
||||
#### Basic Usage
|
||||
|
||||
```bash
|
||||
curl http://localhost:8080/v1/responses \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"model": "ggml-koala-7b-model-q4_0-r2.bin",
|
||||
"input": "Say this is a test!",
|
||||
"max_output_tokens": 1024
|
||||
}'
|
||||
```
|
||||
|
||||
#### Request Parameters
|
||||
|
||||
| Parameter | Type | Required | Description |
|
||||
|-----------|------|----------|-------------|
|
||||
| `model` | string | Yes | The model identifier |
|
||||
| `input` | string/array | Yes | Input text or array of input items |
|
||||
| `max_output_tokens` | integer | No | Maximum number of tokens to generate |
|
||||
| `temperature` | float | No | Sampling temperature |
|
||||
| `top_p` | float | No | Nucleus sampling parameter |
|
||||
| `instructions` | string | No | System instructions |
|
||||
| `tools` | array | No | Array of tool definitions |
|
||||
| `tool_choice` | string/object | No | Tool choice: "auto", "required", "none", or specific tool |
|
||||
| `stream` | boolean | No | Enable streaming responses |
|
||||
| `background` | boolean | No | Run request in background (returns immediately) |
|
||||
| `store` | boolean | No | Whether to store the response |
|
||||
| `reasoning` | object | No | Reasoning configuration with `effort` and `summary` |
|
||||
| `parallel_tool_calls` | boolean | No | Allow parallel tool calls |
|
||||
| `max_tool_calls` | integer | No | Maximum number of tool calls |
|
||||
| `presence_penalty` | float | No | Presence penalty (-2.0 to 2.0) |
|
||||
| `frequency_penalty` | float | No | Frequency penalty (-2.0 to 2.0) |
|
||||
| `top_logprobs` | integer | No | Number of top logprobs to return |
|
||||
| `truncation` | string | No | Truncation mode: "auto" or "disabled" |
|
||||
| `text_format` | object | No | Text format configuration |
|
||||
| `metadata` | object | No | Custom metadata |
|
||||
|
||||
#### Input Format
|
||||
|
||||
Input can be a simple string or an array of structured items:
|
||||
|
||||
```bash
|
||||
curl http://localhost:8080/v1/responses \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"model": "ggml-koala-7b-model-q4_0-r2.bin",
|
||||
"input": [
|
||||
{
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": "What is the weather?"
|
||||
}
|
||||
],
|
||||
"max_output_tokens": 1024
|
||||
}'
|
||||
```
|
||||
|
||||
#### Background Processing
|
||||
|
||||
Run requests in the background for long-running tasks:
|
||||
|
||||
```bash
|
||||
curl http://localhost:8080/v1/responses \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"model": "ggml-koala-7b-model-q4_0-r2.bin",
|
||||
"input": "Generate a long story",
|
||||
"max_output_tokens": 4096,
|
||||
"background": true
|
||||
}'
|
||||
```
|
||||
|
||||
The response will include a response ID that can be used to poll for completion:
|
||||
|
||||
```json
|
||||
{
|
||||
"id": "resp_abc123",
|
||||
"object": "response",
|
||||
"status": "in_progress",
|
||||
"created_at": 1234567890
|
||||
}
|
||||
```
|
||||
|
||||
#### Retrieving Background Responses
|
||||
|
||||
Use the GET endpoint to retrieve background responses:
|
||||
|
||||
```bash
|
||||
# Get response by ID
|
||||
curl http://localhost:8080/v1/responses/resp_abc123
|
||||
|
||||
# Resume streaming with query parameters
|
||||
curl "http://localhost:8080/v1/responses/resp_abc123?stream=true&starting_after=10"
|
||||
```
|
||||
|
||||
#### Canceling Background Responses
|
||||
|
||||
Cancel a background response that's still in progress:
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:8080/v1/responses/resp_abc123/cancel
|
||||
```
|
||||
|
||||
#### Tool Calling
|
||||
|
||||
Open Responses API supports function calling with tools:
|
||||
|
||||
```bash
|
||||
curl http://localhost:8080/v1/responses \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"model": "ggml-koala-7b-model-q4_0-r2.bin",
|
||||
"input": "What is the weather in San Francisco?",
|
||||
"tools": [
|
||||
{
|
||||
"type": "function",
|
||||
"name": "get_weather",
|
||||
"description": "Get the current weather",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state"
|
||||
}
|
||||
},
|
||||
"required": ["location"]
|
||||
}
|
||||
}
|
||||
],
|
||||
"tool_choice": "auto",
|
||||
"max_output_tokens": 1024
|
||||
}'
|
||||
```
|
||||
|
||||
#### Reasoning Configuration
|
||||
|
||||
Configure reasoning effort and summary style:
|
||||
|
||||
```bash
|
||||
curl http://localhost:8080/v1/responses \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"model": "ggml-koala-7b-model-q4_0-r2.bin",
|
||||
"input": "Solve this complex problem step by step",
|
||||
"reasoning": {
|
||||
"effort": "high",
|
||||
"summary": "detailed"
|
||||
},
|
||||
"max_output_tokens": 2048
|
||||
}'
|
||||
```
|
||||
|
||||
#### Response Format
|
||||
|
||||
```json
|
||||
{
|
||||
"id": "resp_abc123",
|
||||
"object": "response",
|
||||
"created_at": 1234567890,
|
||||
"completed_at": 1234567895,
|
||||
"status": "completed",
|
||||
"model": "ggml-koala-7b-model-q4_0-r2.bin",
|
||||
"output": [
|
||||
{
|
||||
"type": "message",
|
||||
"id": "msg_001",
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{
|
||||
"type": "output_text",
|
||||
"text": "This is a test!",
|
||||
"annotations": [],
|
||||
"logprobs": []
|
||||
}
|
||||
],
|
||||
"status": "completed"
|
||||
}
|
||||
],
|
||||
"error": null,
|
||||
"incomplete_details": null,
|
||||
"temperature": 0.7,
|
||||
"top_p": 1.0,
|
||||
"presence_penalty": 0.0,
|
||||
"frequency_penalty": 0.0,
|
||||
"usage": {
|
||||
"input_tokens": 10,
|
||||
"output_tokens": 5,
|
||||
"total_tokens": 15,
|
||||
"input_tokens_details": {
|
||||
"cached_tokens": 0
|
||||
},
|
||||
"output_tokens_details": {
|
||||
"reasoning_tokens": 0
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Backends
|
||||
|
||||
### RWKV
|
||||
|
||||
@@ -112,6 +112,66 @@ curl http://localhost:8080/v1/chat/completions \
|
||||
|
||||
</details>
|
||||
|
||||
### Anthropic Messages API
|
||||
|
||||
LocalAI supports the Anthropic Messages API for Claude-compatible models. [Anthropic documentation](https://docs.anthropic.com/claude/reference/messages_post).
|
||||
|
||||
<details>
|
||||
|
||||
```bash
|
||||
curl http://localhost:8080/v1/messages \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "anthropic-version: 2023-06-01" \
|
||||
-d '{
|
||||
"model": "gpt-4",
|
||||
"max_tokens": 1024,
|
||||
"messages": [
|
||||
{"role": "user", "content": "How are you doing?"}
|
||||
],
|
||||
"temperature": 0.7
|
||||
}'
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
### Open Responses API
|
||||
|
||||
LocalAI supports the Open Responses API specification with support for background processing, streaming, and advanced features. [Open Responses documentation](https://www.openresponses.org/specification).
|
||||
|
||||
<details>
|
||||
|
||||
```bash
|
||||
curl http://localhost:8080/v1/responses \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"model": "gpt-4",
|
||||
"input": "Say this is a test!",
|
||||
"max_output_tokens": 1024,
|
||||
"temperature": 0.7
|
||||
}'
|
||||
```
|
||||
|
||||
For background processing:
|
||||
|
||||
```bash
|
||||
curl http://localhost:8080/v1/responses \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"model": "gpt-4",
|
||||
"input": "Generate a long story",
|
||||
"max_output_tokens": 4096,
|
||||
"background": true
|
||||
}'
|
||||
```
|
||||
|
||||
Then retrieve the response:
|
||||
|
||||
```bash
|
||||
curl http://localhost:8080/v1/responses/<response_id>
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
### Image Generation
|
||||
|
||||
Creates an image given a prompt. [OpenAI documentation](https://platform.openai.com/docs/api-reference/images/create).
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
{
|
||||
"version": "v3.9.0"
|
||||
"version": "v3.10.0"
|
||||
}
|
||||
|
||||
@@ -29,6 +29,7 @@
|
||||
|
||||
This description emphasizes its capabilities, efficiency, and versatility for multimodal search tasks.
|
||||
overrides:
|
||||
reranking: true
|
||||
parameters:
|
||||
model: llama-cpp/models/Qwen3-VL-Reranker-8B.Q4_K_M.gguf
|
||||
name: Qwen3-VL-Reranker-8B-GGUF
|
||||
|
||||
2
go.mod
2
go.mod
@@ -32,7 +32,7 @@ require (
|
||||
github.com/mholt/archiver/v3 v3.5.1
|
||||
github.com/microcosm-cc/bluemonday v1.0.27
|
||||
github.com/modelcontextprotocol/go-sdk v1.2.0
|
||||
github.com/mudler/cogito v0.7.2
|
||||
github.com/mudler/cogito v0.8.1
|
||||
github.com/mudler/edgevpn v0.31.1
|
||||
github.com/mudler/go-processmanager v0.1.0
|
||||
github.com/mudler/memory v0.0.0-20251216220809-d1256471a6c2
|
||||
|
||||
4
go.sum
4
go.sum
@@ -507,8 +507,8 @@ github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7P
|
||||
github.com/mr-tron/base58 v1.1.2/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc=
|
||||
github.com/mr-tron/base58 v1.2.0 h1:T/HDJBh4ZCPbU39/+c3rRvE0uKBQlU27+QI8LJ4t64o=
|
||||
github.com/mr-tron/base58 v1.2.0/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc=
|
||||
github.com/mudler/cogito v0.7.2 h1:J5eHZPsxpoKcnYUfogje5u0nnzGww7ytv7nSn1DMpms=
|
||||
github.com/mudler/cogito v0.7.2/go.mod h1:6sfja3lcu2nWRzEc0wwqGNu/eCG3EWgij+8s7xyUeQ4=
|
||||
github.com/mudler/cogito v0.8.1 h1:66qPJkAMrq/Vo8AC/PvXWuVxYPhi7X2DQuJIilL8+3I=
|
||||
github.com/mudler/cogito v0.8.1/go.mod h1:6sfja3lcu2nWRzEc0wwqGNu/eCG3EWgij+8s7xyUeQ4=
|
||||
github.com/mudler/edgevpn v0.31.1 h1:7qegiDWd0kAg6ljhNHxqvp8hbo/6BbzSdbb7/2WZfiY=
|
||||
github.com/mudler/edgevpn v0.31.1/go.mod h1:ftV5B0nKFzm4R8vR80UYnCb2nf7lxCRgAALxUEEgCf8=
|
||||
github.com/mudler/go-piper v0.0.0-20241023091659-2494246fd9fc h1:RxwneJl1VgvikiX28EkpdAyL4yQVnJMrbquKospjHyA=
|
||||
|
||||
@@ -1,114 +0,0 @@
|
||||
package functions
|
||||
|
||||
import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
// ExtractReasoning extracts reasoning content from thinking tags and returns
|
||||
// both the extracted reasoning and the cleaned content (with tags removed).
|
||||
// It handles <thinking>...</thinking> and <think>...</think> tags.
|
||||
// Multiple reasoning blocks are concatenated with newlines.
|
||||
func ExtractReasoning(content string) (reasoning string, cleanedContent string) {
|
||||
if content == "" {
|
||||
return "", content
|
||||
}
|
||||
|
||||
var reasoningParts []string
|
||||
var cleanedParts []string
|
||||
remaining := content
|
||||
|
||||
// Define tag pairs to look for
|
||||
tagPairs := []struct {
|
||||
start string
|
||||
end string
|
||||
}{
|
||||
{"<thinking>", "</thinking>"},
|
||||
{"<think>", "</think>"},
|
||||
}
|
||||
|
||||
// Track the last position we've processed
|
||||
lastPos := 0
|
||||
|
||||
for {
|
||||
// Find the earliest tag start
|
||||
earliestStart := -1
|
||||
earliestEnd := -1
|
||||
isUnclosed := false
|
||||
var matchedTag struct {
|
||||
start string
|
||||
end string
|
||||
}
|
||||
|
||||
for _, tagPair := range tagPairs {
|
||||
startIdx := strings.Index(remaining[lastPos:], tagPair.start)
|
||||
if startIdx == -1 {
|
||||
continue
|
||||
}
|
||||
startIdx += lastPos
|
||||
|
||||
// Find the corresponding end tag
|
||||
endIdx := strings.Index(remaining[startIdx+len(tagPair.start):], tagPair.end)
|
||||
if endIdx == -1 {
|
||||
// Unclosed tag - extract what we have
|
||||
if earliestStart == -1 || startIdx < earliestStart {
|
||||
earliestStart = startIdx
|
||||
earliestEnd = len(remaining)
|
||||
isUnclosed = true
|
||||
matchedTag = tagPair
|
||||
}
|
||||
continue
|
||||
}
|
||||
endIdx += startIdx + len(tagPair.start)
|
||||
|
||||
// Found a complete tag pair
|
||||
if earliestStart == -1 || startIdx < earliestStart {
|
||||
earliestStart = startIdx
|
||||
earliestEnd = endIdx + len(tagPair.end)
|
||||
isUnclosed = false
|
||||
matchedTag = tagPair
|
||||
}
|
||||
}
|
||||
|
||||
if earliestStart == -1 {
|
||||
// No more tags found, add remaining content
|
||||
if lastPos < len(remaining) {
|
||||
cleanedParts = append(cleanedParts, remaining[lastPos:])
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
// Add content before the tag
|
||||
if earliestStart > lastPos {
|
||||
cleanedParts = append(cleanedParts, remaining[lastPos:earliestStart])
|
||||
}
|
||||
|
||||
// Extract reasoning content
|
||||
reasoningStart := earliestStart + len(matchedTag.start)
|
||||
// For unclosed tags, earliestEnd is already at the end of the string
|
||||
// For closed tags, earliestEnd points to after the closing tag, so we subtract the end tag length
|
||||
var reasoningEnd int
|
||||
if isUnclosed {
|
||||
// Unclosed tag - extract everything to the end
|
||||
reasoningEnd = len(remaining)
|
||||
} else {
|
||||
// Closed tag - exclude the end tag
|
||||
reasoningEnd = earliestEnd - len(matchedTag.end)
|
||||
}
|
||||
if reasoningEnd > reasoningStart {
|
||||
reasoningContent := strings.TrimSpace(remaining[reasoningStart:reasoningEnd])
|
||||
if reasoningContent != "" {
|
||||
reasoningParts = append(reasoningParts, reasoningContent)
|
||||
}
|
||||
}
|
||||
|
||||
// Move past this tag
|
||||
lastPos = earliestEnd
|
||||
}
|
||||
|
||||
// Combine reasoning parts
|
||||
reasoning = strings.Join(reasoningParts, "\n\n")
|
||||
// Combine cleaned content parts
|
||||
cleanedContent = strings.Join(cleanedParts, "")
|
||||
|
||||
return reasoning, cleanedContent
|
||||
}
|
||||
@@ -1,261 +0,0 @@
|
||||
package functions_test
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
. "github.com/mudler/LocalAI/pkg/functions"
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
var _ = Describe("ExtractReasoning", func() {
|
||||
Context("when content has no reasoning tags", func() {
|
||||
It("should return empty reasoning and original content", func() {
|
||||
content := "This is regular content without any tags."
|
||||
reasoning, cleaned := ExtractReasoning(content)
|
||||
Expect(reasoning).To(BeEmpty())
|
||||
Expect(cleaned).To(Equal(content))
|
||||
})
|
||||
|
||||
It("should handle empty string", func() {
|
||||
content := ""
|
||||
reasoning, cleaned := ExtractReasoning(content)
|
||||
Expect(reasoning).To(BeEmpty())
|
||||
Expect(cleaned).To(BeEmpty())
|
||||
})
|
||||
|
||||
It("should handle content with only whitespace", func() {
|
||||
content := " \n\t "
|
||||
reasoning, cleaned := ExtractReasoning(content)
|
||||
Expect(reasoning).To(BeEmpty())
|
||||
Expect(cleaned).To(Equal(content))
|
||||
})
|
||||
})
|
||||
|
||||
Context("when content has <thinking> tags", func() {
|
||||
It("should extract reasoning from single thinking block", func() {
|
||||
content := "Some text <thinking>This is my reasoning</thinking> More text"
|
||||
reasoning, cleaned := ExtractReasoning(content)
|
||||
Expect(reasoning).To(Equal("This is my reasoning"))
|
||||
Expect(cleaned).To(Equal("Some text More text"))
|
||||
})
|
||||
|
||||
It("should extract reasoning and preserve surrounding content", func() {
|
||||
content := "Before <thinking>Reasoning here</thinking> After"
|
||||
reasoning, cleaned := ExtractReasoning(content)
|
||||
Expect(reasoning).To(Equal("Reasoning here"))
|
||||
Expect(cleaned).To(Equal("Before After"))
|
||||
})
|
||||
|
||||
It("should handle thinking block at the start", func() {
|
||||
content := "<thinking>Start reasoning</thinking> Regular content"
|
||||
reasoning, cleaned := ExtractReasoning(content)
|
||||
Expect(reasoning).To(Equal("Start reasoning"))
|
||||
Expect(cleaned).To(Equal(" Regular content"))
|
||||
})
|
||||
|
||||
It("should handle thinking block at the end", func() {
|
||||
content := "Regular content <thinking>End reasoning</thinking>"
|
||||
reasoning, cleaned := ExtractReasoning(content)
|
||||
Expect(reasoning).To(Equal("End reasoning"))
|
||||
Expect(cleaned).To(Equal("Regular content "))
|
||||
})
|
||||
|
||||
It("should handle only thinking block", func() {
|
||||
content := "<thinking>Only reasoning</thinking>"
|
||||
reasoning, cleaned := ExtractReasoning(content)
|
||||
Expect(reasoning).To(Equal("Only reasoning"))
|
||||
Expect(cleaned).To(BeEmpty())
|
||||
})
|
||||
|
||||
It("should trim whitespace from reasoning content", func() {
|
||||
content := "Text <thinking> \n Reasoning with spaces \n </thinking> More"
|
||||
reasoning, cleaned := ExtractReasoning(content)
|
||||
Expect(reasoning).To(Equal("Reasoning with spaces"))
|
||||
Expect(cleaned).To(Equal("Text More"))
|
||||
})
|
||||
})
|
||||
|
||||
Context("when content has <think> tags", func() {
|
||||
It("should extract reasoning from redacted_reasoning block", func() {
|
||||
content := "Text <think>Redacted reasoning</think> More"
|
||||
reasoning, cleaned := ExtractReasoning(content)
|
||||
Expect(reasoning).To(Equal("Redacted reasoning"))
|
||||
Expect(cleaned).To(Equal("Text More"))
|
||||
})
|
||||
|
||||
It("should handle redacted_reasoning with multiline content", func() {
|
||||
content := "Before <think>Line 1\nLine 2\nLine 3</think> After"
|
||||
reasoning, cleaned := ExtractReasoning(content)
|
||||
Expect(reasoning).To(Equal("Line 1\nLine 2\nLine 3"))
|
||||
Expect(cleaned).To(Equal("Before After"))
|
||||
})
|
||||
|
||||
It("should handle redacted_reasoning with complex content", func() {
|
||||
content := "Start <think>Complex reasoning\nwith\nmultiple\nlines</think> End"
|
||||
reasoning, cleaned := ExtractReasoning(content)
|
||||
Expect(reasoning).To(Equal("Complex reasoning\nwith\nmultiple\nlines"))
|
||||
Expect(cleaned).To(Equal("Start End"))
|
||||
})
|
||||
})
|
||||
|
||||
Context("when content has multiple reasoning blocks", func() {
|
||||
It("should concatenate multiple thinking blocks with newlines", func() {
|
||||
content := "Text <thinking>First</thinking> Middle <thinking>Second</thinking> End"
|
||||
reasoning, cleaned := ExtractReasoning(content)
|
||||
Expect(reasoning).To(Equal("First\n\nSecond"))
|
||||
Expect(cleaned).To(Equal("Text Middle End"))
|
||||
})
|
||||
|
||||
It("should handle multiple different tag types", func() {
|
||||
content := "A <thinking>One</thinking> B <think>Two</think> C <think>Three</think> D"
|
||||
reasoning, cleaned := ExtractReasoning(content)
|
||||
Expect(reasoning).To(ContainSubstring("One"))
|
||||
Expect(reasoning).To(ContainSubstring("Two"))
|
||||
Expect(reasoning).To(ContainSubstring("Three"))
|
||||
Expect(cleaned).To(Equal("A B C D"))
|
||||
})
|
||||
|
||||
It("should handle nested tags correctly (extracts first match)", func() {
|
||||
content := "Text <thinking>Outer <think>Inner</think></thinking> More"
|
||||
reasoning, cleaned := ExtractReasoning(content)
|
||||
// Should extract the outer thinking block
|
||||
Expect(reasoning).To(ContainSubstring("Outer"))
|
||||
Expect(reasoning).To(ContainSubstring("Inner"))
|
||||
Expect(cleaned).To(Equal("Text More"))
|
||||
})
|
||||
})
|
||||
|
||||
Context("when content has unclosed reasoning tags", func() {
|
||||
It("should extract unclosed thinking block", func() {
|
||||
content := "Text <thinking>Unclosed reasoning"
|
||||
reasoning, cleaned := ExtractReasoning(content)
|
||||
Expect(reasoning).To(Equal("Unclosed reasoning"))
|
||||
Expect(cleaned).To(Equal("Text "))
|
||||
})
|
||||
|
||||
It("should extract unclosed think block", func() {
|
||||
content := "Before <think>Incomplete"
|
||||
reasoning, cleaned := ExtractReasoning(content)
|
||||
Expect(reasoning).To(Equal("Incomplete"))
|
||||
Expect(cleaned).To(Equal("Before "))
|
||||
})
|
||||
|
||||
It("should extract unclosed redacted_reasoning block", func() {
|
||||
content := "Start <think>Partial reasoning content"
|
||||
reasoning, cleaned := ExtractReasoning(content)
|
||||
Expect(reasoning).To(Equal("Partial reasoning content"))
|
||||
Expect(cleaned).To(Equal("Start "))
|
||||
})
|
||||
|
||||
It("should handle unclosed tag at the end", func() {
|
||||
content := "Regular content <thinking>Unclosed at end"
|
||||
reasoning, cleaned := ExtractReasoning(content)
|
||||
Expect(reasoning).To(Equal("Unclosed at end"))
|
||||
Expect(cleaned).To(Equal("Regular content "))
|
||||
})
|
||||
})
|
||||
|
||||
Context("when content has empty reasoning blocks", func() {
|
||||
It("should ignore empty thinking block", func() {
|
||||
content := "Text <thinking></thinking> More"
|
||||
reasoning, cleaned := ExtractReasoning(content)
|
||||
Expect(reasoning).To(BeEmpty())
|
||||
Expect(cleaned).To(Equal("Text More"))
|
||||
})
|
||||
|
||||
It("should ignore thinking block with only whitespace", func() {
|
||||
content := "Text <thinking> \n\t </thinking> More"
|
||||
reasoning, cleaned := ExtractReasoning(content)
|
||||
Expect(reasoning).To(BeEmpty())
|
||||
Expect(cleaned).To(Equal("Text More"))
|
||||
})
|
||||
})
|
||||
|
||||
Context("when content has reasoning tags with special characters", func() {
|
||||
It("should handle reasoning with newlines", func() {
|
||||
content := "Before <thinking>Line 1\nLine 2\nLine 3</thinking> After"
|
||||
reasoning, cleaned := ExtractReasoning(content)
|
||||
Expect(reasoning).To(Equal("Line 1\nLine 2\nLine 3"))
|
||||
Expect(cleaned).To(Equal("Before After"))
|
||||
})
|
||||
|
||||
It("should handle reasoning with code blocks", func() {
|
||||
content := "Text <thinking>Reasoning with ```code``` blocks</thinking> More"
|
||||
reasoning, cleaned := ExtractReasoning(content)
|
||||
Expect(reasoning).To(Equal("Reasoning with ```code``` blocks"))
|
||||
Expect(cleaned).To(Equal("Text More"))
|
||||
})
|
||||
|
||||
It("should handle reasoning with JSON", func() {
|
||||
content := "Before <think>{\"key\": \"value\"}</think> After"
|
||||
reasoning, cleaned := ExtractReasoning(content)
|
||||
Expect(reasoning).To(Equal("{\"key\": \"value\"}"))
|
||||
Expect(cleaned).To(Equal("Before After"))
|
||||
})
|
||||
|
||||
It("should handle reasoning with HTML-like content", func() {
|
||||
content := "Text <thinking>Reasoning with <tags> inside</thinking> More"
|
||||
reasoning, cleaned := ExtractReasoning(content)
|
||||
Expect(reasoning).To(Equal("Reasoning with <tags> inside"))
|
||||
Expect(cleaned).To(Equal("Text More"))
|
||||
})
|
||||
})
|
||||
|
||||
Context("when content has reasoning mixed with regular content", func() {
|
||||
It("should preserve content order correctly", func() {
|
||||
content := "Start <thinking>Reasoning</thinking> Middle <think>More reasoning</think> End"
|
||||
reasoning, cleaned := ExtractReasoning(content)
|
||||
Expect(reasoning).To(ContainSubstring("Reasoning"))
|
||||
Expect(reasoning).To(ContainSubstring("More reasoning"))
|
||||
Expect(cleaned).To(Equal("Start Middle End"))
|
||||
})
|
||||
|
||||
It("should handle reasoning in the middle of a sentence", func() {
|
||||
content := "This is a <thinking>reasoning</thinking> sentence."
|
||||
reasoning, cleaned := ExtractReasoning(content)
|
||||
Expect(reasoning).To(Equal("reasoning"))
|
||||
Expect(cleaned).To(Equal("This is a sentence."))
|
||||
})
|
||||
})
|
||||
|
||||
Context("edge cases", func() {
|
||||
It("should handle content with only opening tag", func() {
|
||||
content := "<thinking>"
|
||||
reasoning, cleaned := ExtractReasoning(content)
|
||||
Expect(reasoning).To(BeEmpty())
|
||||
Expect(cleaned).To(Equal(""))
|
||||
})
|
||||
|
||||
It("should handle content with only closing tag", func() {
|
||||
content := "</thinking>"
|
||||
reasoning, cleaned := ExtractReasoning(content)
|
||||
Expect(reasoning).To(BeEmpty())
|
||||
Expect(cleaned).To(Equal("</thinking>"))
|
||||
})
|
||||
|
||||
It("should handle mismatched tags", func() {
|
||||
content := "<thinking>Content</think>"
|
||||
reasoning, cleaned := ExtractReasoning(content)
|
||||
// Should extract unclosed thinking block
|
||||
Expect(reasoning).To(ContainSubstring("Content"))
|
||||
Expect(cleaned).To(Equal(""))
|
||||
})
|
||||
|
||||
It("should handle very long reasoning content", func() {
|
||||
longReasoning := strings.Repeat("This is reasoning content. ", 100)
|
||||
content := "Text <thinking>" + longReasoning + "</thinking> More"
|
||||
reasoning, cleaned := ExtractReasoning(content)
|
||||
// TrimSpace is applied, so we need to account for that
|
||||
Expect(reasoning).To(Equal(strings.TrimSpace(longReasoning)))
|
||||
Expect(cleaned).To(Equal("Text More"))
|
||||
})
|
||||
|
||||
It("should handle reasoning with unicode characters", func() {
|
||||
content := "Text <thinking>Reasoning with 中文 and emoji 🧠</thinking> More"
|
||||
reasoning, cleaned := ExtractReasoning(content)
|
||||
Expect(reasoning).To(Equal("Reasoning with 中文 and emoji 🧠"))
|
||||
Expect(cleaned).To(Equal("Text More"))
|
||||
})
|
||||
})
|
||||
})
|
||||
8
pkg/reasoning/config.go
Normal file
8
pkg/reasoning/config.go
Normal file
@@ -0,0 +1,8 @@
|
||||
package reasoning
|
||||
|
||||
type ReasoningConfig struct {
|
||||
// ThinkingForcedOpen indicates that the model outputs reasoning without an opening tag.
|
||||
// When true, all content from the start is treated as reasoning until a closing tag is found.
|
||||
// This is useful for models like GLM-4 that output reasoning without <think> but end with </think>.
|
||||
ThinkingForcedOpen bool `yaml:"thinking_forced_open,omitempty" json:"thinking_forced_open,omitempty"`
|
||||
}
|
||||
18
pkg/reasoning/options.go
Normal file
18
pkg/reasoning/options.go
Normal file
@@ -0,0 +1,18 @@
|
||||
package reasoning
|
||||
|
||||
// options holds the configuration for reasoning extraction
|
||||
type options struct {
|
||||
thinkingForcedOpen bool
|
||||
}
|
||||
|
||||
// Option is a functional option for configuring reasoning extraction
|
||||
type Option func(*options)
|
||||
|
||||
// WithThinkingForcedOpen configures the extractor to treat all content from the start
|
||||
// as reasoning until a closing tag is found. This is useful for models like GLM-4
|
||||
// that output reasoning without <think> but end with </think>.
|
||||
func WithThinkingForcedOpen() Option {
|
||||
return func(o *options) {
|
||||
o.thinkingForcedOpen = true
|
||||
}
|
||||
}
|
||||
256
pkg/reasoning/reasoning.go
Normal file
256
pkg/reasoning/reasoning.go
Normal file
@@ -0,0 +1,256 @@
|
||||
package reasoning
|
||||
|
||||
import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Common thinking/reasoning opening tags used by various models.
|
||||
// These match the tags detected by llama.cpp in common/chat.cpp
|
||||
var thinkingOpenTags = []string{
|
||||
// DeepSeek R1, V3.1, Nemotron V2, MiniMax M2, Hermes 2 Pro, Granite, Exaone MOE
|
||||
"<think>\n",
|
||||
"<think>",
|
||||
// Generic thinking tags
|
||||
"<thinking>\n",
|
||||
"<thinking>",
|
||||
// Apertus
|
||||
"<|inner_prefix|>",
|
||||
// Command R7B
|
||||
"<|START_THINKING|>",
|
||||
// Seed
|
||||
"<seed:think>",
|
||||
// Magistral (not in llama.cpp but common)
|
||||
"[THINK]\n",
|
||||
"[THINK]",
|
||||
}
|
||||
|
||||
// DetectThinkingForcedOpen checks if a prompt ends with a thinking opening tag.
|
||||
// This is used to automatically detect when the model template has already added
|
||||
// the opening thinking tag, meaning the model will output reasoning content directly.
|
||||
// Returns true if the prompt ends with a known thinking opening tag.
|
||||
func DetectThinkingForcedOpen(prompt string) bool {
|
||||
for _, tag := range thinkingOpenTags {
|
||||
if strings.HasSuffix(prompt, tag) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// Extract extracts reasoning content from thinking tags and returns
|
||||
// both the extracted reasoning and the cleaned content (with tags removed).
|
||||
// It handles <thinking>...</thinking> and <think>...</think> tags.
|
||||
// Multiple reasoning blocks are concatenated with newlines.
|
||||
// It also handles the case where only a closing tag is present (no opening tag),
|
||||
// in which case everything before the closing tag is treated as reasoning.
|
||||
//
|
||||
// Use WithThinkingForcedOpen() option when all content from the start should be
|
||||
// treated as reasoning until a closing tag is found.
|
||||
func Extract(content string, opts ...Option) (reasoning string, cleanedContent string) {
|
||||
if content == "" {
|
||||
return "", content
|
||||
}
|
||||
|
||||
cfg := &options{}
|
||||
for _, opt := range opts {
|
||||
opt(cfg)
|
||||
}
|
||||
|
||||
if cfg.thinkingForcedOpen {
|
||||
return extractForcedOpen(content)
|
||||
}
|
||||
|
||||
return extractFromTags(content)
|
||||
}
|
||||
|
||||
// extractForcedOpen handles the case where reasoning starts without an opening tag.
|
||||
// All content from the start is treated as reasoning until a closing tag is found.
|
||||
func extractForcedOpen(content string) (reasoning string, cleanedContent string) {
|
||||
// Look for the earliest closing tag
|
||||
// These match the closing tags used by llama.cpp for various models
|
||||
closingTags := []string{
|
||||
"</thinking>",
|
||||
"</think>",
|
||||
"<|END_THINKING|>", // Command R7B
|
||||
"<|inner_suffix|>", // Apertus
|
||||
"</seed:think>", // Seed
|
||||
"[/THINK]", // Magistral
|
||||
}
|
||||
|
||||
earliestCloseIdx := -1
|
||||
var matchedCloseTag string
|
||||
|
||||
for _, closeTag := range closingTags {
|
||||
idx := strings.Index(content, closeTag)
|
||||
if idx != -1 && (earliestCloseIdx == -1 || idx < earliestCloseIdx) {
|
||||
earliestCloseIdx = idx
|
||||
matchedCloseTag = closeTag
|
||||
}
|
||||
}
|
||||
|
||||
if earliestCloseIdx == -1 {
|
||||
// No closing tag found - all content is reasoning (still streaming)
|
||||
return strings.TrimSpace(content), ""
|
||||
}
|
||||
|
||||
// Found closing tag - everything before is reasoning, everything after is content
|
||||
reasoning = strings.TrimSpace(content[:earliestCloseIdx])
|
||||
cleanedContent = content[earliestCloseIdx+len(matchedCloseTag):]
|
||||
|
||||
// Continue processing the rest for any additional reasoning blocks
|
||||
if cleanedContent != "" {
|
||||
additionalReasoning, finalContent := extractFromTags(cleanedContent)
|
||||
if additionalReasoning != "" {
|
||||
if reasoning != "" {
|
||||
reasoning = reasoning + "\n\n" + additionalReasoning
|
||||
} else {
|
||||
reasoning = additionalReasoning
|
||||
}
|
||||
}
|
||||
cleanedContent = finalContent
|
||||
}
|
||||
|
||||
return reasoning, cleanedContent
|
||||
}
|
||||
|
||||
// extractFromTags extracts reasoning content from thinking tags.
|
||||
// This is the core implementation that handles standard tag-based extraction.
|
||||
func extractFromTags(content string) (reasoning string, cleanedContent string) {
|
||||
if content == "" {
|
||||
return "", content
|
||||
}
|
||||
|
||||
var reasoningParts []string
|
||||
var cleanedParts []string
|
||||
remaining := content
|
||||
|
||||
// Define tag pairs to look for
|
||||
// These match the tags used by llama.cpp for various models
|
||||
tagPairs := []struct {
|
||||
start string
|
||||
end string
|
||||
}{
|
||||
{"<thinking>", "</thinking>"},
|
||||
{"<think>", "</think>"},
|
||||
{"<|START_THINKING|>", "<|END_THINKING|>"}, // Command R7B
|
||||
{"<|inner_prefix|>", "<|inner_suffix|>"}, // Apertus
|
||||
{"<seed:think>", "</seed:think>"}, // Seed
|
||||
{"[THINK]", "[/THINK]"}, // Magistral
|
||||
}
|
||||
|
||||
// Track the last position we've processed
|
||||
lastPos := 0
|
||||
|
||||
for {
|
||||
// Find the earliest tag start
|
||||
earliestStart := -1
|
||||
earliestEnd := -1
|
||||
isUnclosed := false
|
||||
isClosingOnly := false
|
||||
var matchedTag struct {
|
||||
start string
|
||||
end string
|
||||
}
|
||||
|
||||
for _, tagPair := range tagPairs {
|
||||
startIdx := strings.Index(remaining[lastPos:], tagPair.start)
|
||||
endIdx := strings.Index(remaining[lastPos:], tagPair.end)
|
||||
|
||||
// Check for closing-only tag (closing tag appears before or without opening tag)
|
||||
if endIdx != -1 && (startIdx == -1 || endIdx < startIdx) {
|
||||
// Found a closing tag without a preceding opening tag
|
||||
closingTagPos := endIdx + lastPos
|
||||
if earliestStart == -1 || closingTagPos < earliestStart || (isClosingOnly && closingTagPos < earliestEnd) {
|
||||
earliestStart = lastPos
|
||||
earliestEnd = closingTagPos + len(tagPair.end)
|
||||
isClosingOnly = true
|
||||
isUnclosed = false
|
||||
matchedTag = tagPair
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if startIdx == -1 {
|
||||
continue
|
||||
}
|
||||
startIdx += lastPos
|
||||
|
||||
// Find the corresponding end tag after the start tag
|
||||
endIdxAfterStart := strings.Index(remaining[startIdx+len(tagPair.start):], tagPair.end)
|
||||
if endIdxAfterStart == -1 {
|
||||
// Unclosed tag - extract what we have
|
||||
if earliestStart == -1 || startIdx < earliestStart {
|
||||
earliestStart = startIdx
|
||||
earliestEnd = len(remaining)
|
||||
isUnclosed = true
|
||||
isClosingOnly = false
|
||||
matchedTag = tagPair
|
||||
}
|
||||
continue
|
||||
}
|
||||
endIdxAfterStart += startIdx + len(tagPair.start)
|
||||
|
||||
// Found a complete tag pair
|
||||
if earliestStart == -1 || startIdx < earliestStart {
|
||||
earliestStart = startIdx
|
||||
earliestEnd = endIdxAfterStart + len(tagPair.end)
|
||||
isUnclosed = false
|
||||
isClosingOnly = false
|
||||
matchedTag = tagPair
|
||||
}
|
||||
}
|
||||
|
||||
if earliestStart == -1 {
|
||||
// No more tags found, add remaining content
|
||||
if lastPos < len(remaining) {
|
||||
cleanedParts = append(cleanedParts, remaining[lastPos:])
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
if isClosingOnly {
|
||||
// Closing tag without opening tag - content before closing tag is reasoning
|
||||
reasoningContent := strings.TrimSpace(remaining[lastPos : earliestEnd-len(matchedTag.end)])
|
||||
if reasoningContent != "" {
|
||||
reasoningParts = append(reasoningParts, reasoningContent)
|
||||
}
|
||||
// Move past the closing tag
|
||||
lastPos = earliestEnd
|
||||
continue
|
||||
}
|
||||
|
||||
// Add content before the tag
|
||||
if earliestStart > lastPos {
|
||||
cleanedParts = append(cleanedParts, remaining[lastPos:earliestStart])
|
||||
}
|
||||
|
||||
// Extract reasoning content
|
||||
reasoningStart := earliestStart + len(matchedTag.start)
|
||||
// For unclosed tags, earliestEnd is already at the end of the string
|
||||
// For closed tags, earliestEnd points to after the closing tag, so we subtract the end tag length
|
||||
var reasoningEnd int
|
||||
if isUnclosed {
|
||||
// Unclosed tag - extract everything to the end
|
||||
reasoningEnd = len(remaining)
|
||||
} else {
|
||||
// Closed tag - exclude the end tag
|
||||
reasoningEnd = earliestEnd - len(matchedTag.end)
|
||||
}
|
||||
if reasoningEnd > reasoningStart {
|
||||
reasoningContent := strings.TrimSpace(remaining[reasoningStart:reasoningEnd])
|
||||
if reasoningContent != "" {
|
||||
reasoningParts = append(reasoningParts, reasoningContent)
|
||||
}
|
||||
}
|
||||
|
||||
// Move past this tag
|
||||
lastPos = earliestEnd
|
||||
}
|
||||
|
||||
// Combine reasoning parts
|
||||
reasoning = strings.Join(reasoningParts, "\n\n")
|
||||
// Combine cleaned content parts
|
||||
cleanedContent = strings.Join(cleanedParts, "")
|
||||
|
||||
return reasoning, cleanedContent
|
||||
}
|
||||
13
pkg/reasoning/reasoning_suite_test.go
Normal file
13
pkg/reasoning/reasoning_suite_test.go
Normal file
@@ -0,0 +1,13 @@
|
||||
package reasoning_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
func TestReasoning(t *testing.T) {
|
||||
RegisterFailHandler(Fail)
|
||||
RunSpecs(t, "Reasoning Suite")
|
||||
}
|
||||
499
pkg/reasoning/reasoning_test.go
Normal file
499
pkg/reasoning/reasoning_test.go
Normal file
@@ -0,0 +1,499 @@
|
||||
package reasoning_test
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
. "github.com/mudler/LocalAI/pkg/reasoning"
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
var _ = Describe("DetectThinkingForcedOpen", func() {
|
||||
It("should detect <think> at end of prompt", func() {
|
||||
Expect(DetectThinkingForcedOpen("Some prompt<think>")).To(BeTrue())
|
||||
Expect(DetectThinkingForcedOpen("Some prompt<think>\n")).To(BeTrue())
|
||||
})
|
||||
|
||||
It("should detect <thinking> at end of prompt", func() {
|
||||
Expect(DetectThinkingForcedOpen("Some prompt<thinking>")).To(BeTrue())
|
||||
Expect(DetectThinkingForcedOpen("Some prompt<thinking>\n")).To(BeTrue())
|
||||
})
|
||||
|
||||
It("should detect model-specific tags", func() {
|
||||
Expect(DetectThinkingForcedOpen("Some prompt<|inner_prefix|>")).To(BeTrue())
|
||||
Expect(DetectThinkingForcedOpen("Some prompt<|START_THINKING|>")).To(BeTrue())
|
||||
Expect(DetectThinkingForcedOpen("Some prompt<seed:think>")).To(BeTrue())
|
||||
Expect(DetectThinkingForcedOpen("Some prompt[THINK]")).To(BeTrue())
|
||||
Expect(DetectThinkingForcedOpen("Some prompt[THINK]\n")).To(BeTrue())
|
||||
})
|
||||
|
||||
It("should not detect if tag is in the middle", func() {
|
||||
Expect(DetectThinkingForcedOpen("Some <think> prompt")).To(BeFalse())
|
||||
Expect(DetectThinkingForcedOpen("<think>reasoning</think>")).To(BeFalse())
|
||||
})
|
||||
|
||||
It("should not detect if no thinking tag", func() {
|
||||
Expect(DetectThinkingForcedOpen("Some regular prompt")).To(BeFalse())
|
||||
Expect(DetectThinkingForcedOpen("")).To(BeFalse())
|
||||
})
|
||||
})
|
||||
|
||||
var _ = Describe("Extract", func() {
|
||||
Context("when content has no reasoning tags", func() {
|
||||
It("should return empty reasoning and original content", func() {
|
||||
content := "This is regular content without any tags."
|
||||
reasoning, cleaned := Extract(content)
|
||||
Expect(reasoning).To(BeEmpty())
|
||||
Expect(cleaned).To(Equal(content))
|
||||
})
|
||||
|
||||
It("should handle empty string", func() {
|
||||
content := ""
|
||||
reasoning, cleaned := Extract(content)
|
||||
Expect(reasoning).To(BeEmpty())
|
||||
Expect(cleaned).To(BeEmpty())
|
||||
})
|
||||
|
||||
It("should handle content with only whitespace", func() {
|
||||
content := " \n\t "
|
||||
reasoning, cleaned := Extract(content)
|
||||
Expect(reasoning).To(BeEmpty())
|
||||
Expect(cleaned).To(Equal(content))
|
||||
})
|
||||
})
|
||||
|
||||
Context("when content has <thinking> tags", func() {
|
||||
It("should extract reasoning from single thinking block", func() {
|
||||
content := "Some text <thinking>This is my reasoning</thinking> More text"
|
||||
reasoning, cleaned := Extract(content)
|
||||
Expect(reasoning).To(Equal("This is my reasoning"))
|
||||
Expect(cleaned).To(Equal("Some text More text"))
|
||||
})
|
||||
|
||||
It("should extract reasoning and preserve surrounding content", func() {
|
||||
content := "Before <thinking>Reasoning here</thinking> After"
|
||||
reasoning, cleaned := Extract(content)
|
||||
Expect(reasoning).To(Equal("Reasoning here"))
|
||||
Expect(cleaned).To(Equal("Before After"))
|
||||
})
|
||||
|
||||
It("should handle thinking block at the start", func() {
|
||||
content := "<thinking>Start reasoning</thinking> Regular content"
|
||||
reasoning, cleaned := Extract(content)
|
||||
Expect(reasoning).To(Equal("Start reasoning"))
|
||||
Expect(cleaned).To(Equal(" Regular content"))
|
||||
})
|
||||
|
||||
It("should handle thinking block at the end", func() {
|
||||
content := "Regular content <thinking>End reasoning</thinking>"
|
||||
reasoning, cleaned := Extract(content)
|
||||
Expect(reasoning).To(Equal("End reasoning"))
|
||||
Expect(cleaned).To(Equal("Regular content "))
|
||||
})
|
||||
|
||||
It("should handle only thinking block", func() {
|
||||
content := "<thinking>Only reasoning</thinking>"
|
||||
reasoning, cleaned := Extract(content)
|
||||
Expect(reasoning).To(Equal("Only reasoning"))
|
||||
Expect(cleaned).To(BeEmpty())
|
||||
})
|
||||
|
||||
It("should trim whitespace from reasoning content", func() {
|
||||
content := "Text <thinking> \n Reasoning with spaces \n </thinking> More"
|
||||
reasoning, cleaned := Extract(content)
|
||||
Expect(reasoning).To(Equal("Reasoning with spaces"))
|
||||
Expect(cleaned).To(Equal("Text More"))
|
||||
})
|
||||
})
|
||||
|
||||
Context("when content has <think> tags", func() {
|
||||
It("should extract reasoning from redacted_reasoning block", func() {
|
||||
content := "Text <think>Redacted reasoning</think> More"
|
||||
reasoning, cleaned := Extract(content)
|
||||
Expect(reasoning).To(Equal("Redacted reasoning"))
|
||||
Expect(cleaned).To(Equal("Text More"))
|
||||
})
|
||||
|
||||
It("should handle redacted_reasoning with multiline content", func() {
|
||||
content := "Before <think>Line 1\nLine 2\nLine 3</think> After"
|
||||
reasoning, cleaned := Extract(content)
|
||||
Expect(reasoning).To(Equal("Line 1\nLine 2\nLine 3"))
|
||||
Expect(cleaned).To(Equal("Before After"))
|
||||
})
|
||||
|
||||
It("should handle redacted_reasoning with complex content", func() {
|
||||
content := "Start <think>Complex reasoning\nwith\nmultiple\nlines</think> End"
|
||||
reasoning, cleaned := Extract(content)
|
||||
Expect(reasoning).To(Equal("Complex reasoning\nwith\nmultiple\nlines"))
|
||||
Expect(cleaned).To(Equal("Start End"))
|
||||
})
|
||||
})
|
||||
|
||||
Context("when content has multiple reasoning blocks", func() {
|
||||
It("should concatenate multiple thinking blocks with newlines", func() {
|
||||
content := "Text <thinking>First</thinking> Middle <thinking>Second</thinking> End"
|
||||
reasoning, cleaned := Extract(content)
|
||||
Expect(reasoning).To(Equal("First\n\nSecond"))
|
||||
Expect(cleaned).To(Equal("Text Middle End"))
|
||||
})
|
||||
|
||||
It("should handle multiple different tag types", func() {
|
||||
content := "A <thinking>One</thinking> B <think>Two</think> C <think>Three</think> D"
|
||||
reasoning, cleaned := Extract(content)
|
||||
Expect(reasoning).To(ContainSubstring("One"))
|
||||
Expect(reasoning).To(ContainSubstring("Two"))
|
||||
Expect(reasoning).To(ContainSubstring("Three"))
|
||||
Expect(cleaned).To(Equal("A B C D"))
|
||||
})
|
||||
|
||||
It("should handle nested tags correctly (extracts first match)", func() {
|
||||
content := "Text <thinking>Outer <think>Inner</think></thinking> More"
|
||||
reasoning, cleaned := Extract(content)
|
||||
// Should extract the outer thinking block
|
||||
Expect(reasoning).To(ContainSubstring("Outer"))
|
||||
Expect(reasoning).To(ContainSubstring("Inner"))
|
||||
Expect(cleaned).To(Equal("Text More"))
|
||||
})
|
||||
})
|
||||
|
||||
Context("when content has unclosed reasoning tags", func() {
|
||||
It("should extract unclosed thinking block", func() {
|
||||
content := "Text <thinking>Unclosed reasoning"
|
||||
reasoning, cleaned := Extract(content)
|
||||
Expect(reasoning).To(Equal("Unclosed reasoning"))
|
||||
Expect(cleaned).To(Equal("Text "))
|
||||
})
|
||||
|
||||
It("should extract unclosed think block", func() {
|
||||
content := "Before <think>Incomplete"
|
||||
reasoning, cleaned := Extract(content)
|
||||
Expect(reasoning).To(Equal("Incomplete"))
|
||||
Expect(cleaned).To(Equal("Before "))
|
||||
})
|
||||
|
||||
It("should extract unclosed redacted_reasoning block", func() {
|
||||
content := "Start <think>Partial reasoning content"
|
||||
reasoning, cleaned := Extract(content)
|
||||
Expect(reasoning).To(Equal("Partial reasoning content"))
|
||||
Expect(cleaned).To(Equal("Start "))
|
||||
})
|
||||
|
||||
It("should handle unclosed tag at the end", func() {
|
||||
content := "Regular content <thinking>Unclosed at end"
|
||||
reasoning, cleaned := Extract(content)
|
||||
Expect(reasoning).To(Equal("Unclosed at end"))
|
||||
Expect(cleaned).To(Equal("Regular content "))
|
||||
})
|
||||
})
|
||||
|
||||
Context("when content has empty reasoning blocks", func() {
|
||||
It("should ignore empty thinking block", func() {
|
||||
content := "Text <thinking></thinking> More"
|
||||
reasoning, cleaned := Extract(content)
|
||||
Expect(reasoning).To(BeEmpty())
|
||||
Expect(cleaned).To(Equal("Text More"))
|
||||
})
|
||||
|
||||
It("should ignore thinking block with only whitespace", func() {
|
||||
content := "Text <thinking> \n\t </thinking> More"
|
||||
reasoning, cleaned := Extract(content)
|
||||
Expect(reasoning).To(BeEmpty())
|
||||
Expect(cleaned).To(Equal("Text More"))
|
||||
})
|
||||
})
|
||||
|
||||
Context("when content has reasoning tags with special characters", func() {
|
||||
It("should handle reasoning with newlines", func() {
|
||||
content := "Before <thinking>Line 1\nLine 2\nLine 3</thinking> After"
|
||||
reasoning, cleaned := Extract(content)
|
||||
Expect(reasoning).To(Equal("Line 1\nLine 2\nLine 3"))
|
||||
Expect(cleaned).To(Equal("Before After"))
|
||||
})
|
||||
|
||||
It("should handle reasoning with code blocks", func() {
|
||||
content := "Text <thinking>Reasoning with ```code``` blocks</thinking> More"
|
||||
reasoning, cleaned := Extract(content)
|
||||
Expect(reasoning).To(Equal("Reasoning with ```code``` blocks"))
|
||||
Expect(cleaned).To(Equal("Text More"))
|
||||
})
|
||||
|
||||
It("should handle reasoning with JSON", func() {
|
||||
content := "Before <think>{\"key\": \"value\"}</think> After"
|
||||
reasoning, cleaned := Extract(content)
|
||||
Expect(reasoning).To(Equal("{\"key\": \"value\"}"))
|
||||
Expect(cleaned).To(Equal("Before After"))
|
||||
})
|
||||
|
||||
It("should handle reasoning with HTML-like content", func() {
|
||||
content := "Text <thinking>Reasoning with <tags> inside</thinking> More"
|
||||
reasoning, cleaned := Extract(content)
|
||||
Expect(reasoning).To(Equal("Reasoning with <tags> inside"))
|
||||
Expect(cleaned).To(Equal("Text More"))
|
||||
})
|
||||
})
|
||||
|
||||
Context("when content has reasoning mixed with regular content", func() {
|
||||
It("should preserve content order correctly", func() {
|
||||
content := "Start <thinking>Reasoning</thinking> Middle <think>More reasoning</think> End"
|
||||
reasoning, cleaned := Extract(content)
|
||||
Expect(reasoning).To(ContainSubstring("Reasoning"))
|
||||
Expect(reasoning).To(ContainSubstring("More reasoning"))
|
||||
Expect(cleaned).To(Equal("Start Middle End"))
|
||||
})
|
||||
|
||||
It("should handle reasoning in the middle of a sentence", func() {
|
||||
content := "This is a <thinking>reasoning</thinking> sentence."
|
||||
reasoning, cleaned := Extract(content)
|
||||
Expect(reasoning).To(Equal("reasoning"))
|
||||
Expect(cleaned).To(Equal("This is a sentence."))
|
||||
})
|
||||
})
|
||||
|
||||
Context("edge cases without WithThinkingForcedOpen", func() {
|
||||
It("should handle content with only opening tag", func() {
|
||||
content := "<thinking>"
|
||||
reasoning, cleaned := Extract(content)
|
||||
Expect(reasoning).To(BeEmpty())
|
||||
Expect(cleaned).To(Equal(""))
|
||||
})
|
||||
|
||||
It("should handle content with only closing tag (no content before)", func() {
|
||||
content := "</thinking>"
|
||||
reasoning, cleaned := Extract(content)
|
||||
Expect(reasoning).To(BeEmpty())
|
||||
Expect(cleaned).To(BeEmpty())
|
||||
})
|
||||
|
||||
It("should extract reasoning when only closing tag is present", func() {
|
||||
// GLM-4 style: reasoning content followed by closing tag without opening tag
|
||||
content := "This is reasoning content</think>this is the actual response"
|
||||
reasoning, cleaned := Extract(content)
|
||||
Expect(reasoning).To(Equal("This is reasoning content"))
|
||||
Expect(cleaned).To(Equal("this is the actual response"))
|
||||
})
|
||||
|
||||
It("should handle closing-only tag with multiline reasoning", func() {
|
||||
content := "1. First point\n2. Second point\n3. Third point</think>Final answer"
|
||||
reasoning, cleaned := Extract(content)
|
||||
Expect(reasoning).To(Equal("1. First point\n2. Second point\n3. Third point"))
|
||||
Expect(cleaned).To(Equal("Final answer"))
|
||||
})
|
||||
|
||||
It("should handle closing-only tag with complex reasoning (GLM-4 example)", func() {
|
||||
content := "**Analyze the user's input:** The user says something.\n\n**Final Decision:** Output the text.</think>this is a test"
|
||||
reasoning, cleaned := Extract(content)
|
||||
Expect(reasoning).To(Equal("**Analyze the user's input:** The user says something.\n\n**Final Decision:** Output the text."))
|
||||
Expect(cleaned).To(Equal("this is a test"))
|
||||
})
|
||||
|
||||
It("should handle closing-only thinking tag", func() {
|
||||
content := "Some reasoning here</thinking>actual content"
|
||||
reasoning, cleaned := Extract(content)
|
||||
Expect(reasoning).To(Equal("Some reasoning here"))
|
||||
Expect(cleaned).To(Equal("actual content"))
|
||||
})
|
||||
|
||||
It("should handle mismatched tags", func() {
|
||||
content := "<thinking>Content</think>"
|
||||
reasoning, cleaned := Extract(content)
|
||||
// Should extract unclosed thinking block
|
||||
Expect(reasoning).To(ContainSubstring("Content"))
|
||||
Expect(cleaned).To(Equal(""))
|
||||
})
|
||||
|
||||
It("should handle very long reasoning content", func() {
|
||||
longReasoning := strings.Repeat("This is reasoning content. ", 100)
|
||||
content := "Text <thinking>" + longReasoning + "</thinking> More"
|
||||
reasoning, cleaned := Extract(content)
|
||||
// TrimSpace is applied, so we need to account for that
|
||||
Expect(reasoning).To(Equal(strings.TrimSpace(longReasoning)))
|
||||
Expect(cleaned).To(Equal("Text More"))
|
||||
})
|
||||
|
||||
It("should handle reasoning with unicode characters", func() {
|
||||
content := "Text <thinking>Reasoning with 中文 and emoji 🧠</thinking> More"
|
||||
reasoning, cleaned := Extract(content)
|
||||
Expect(reasoning).To(Equal("Reasoning with 中文 and emoji 🧠"))
|
||||
Expect(cleaned).To(Equal("Text More"))
|
||||
})
|
||||
})
|
||||
|
||||
Context("with WithThinkingForcedOpen option", func() {
|
||||
It("should treat all content as reasoning until closing tag", func() {
|
||||
content := "This is reasoning</think>this is content"
|
||||
reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
|
||||
Expect(reasoning).To(Equal("This is reasoning"))
|
||||
Expect(cleaned).To(Equal("this is content"))
|
||||
})
|
||||
|
||||
It("should treat all content as reasoning when no closing tag (streaming)", func() {
|
||||
content := "This is reasoning content still streaming"
|
||||
reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
|
||||
Expect(reasoning).To(Equal("This is reasoning content still streaming"))
|
||||
Expect(cleaned).To(BeEmpty())
|
||||
})
|
||||
|
||||
It("should handle GLM-4 style output", func() {
|
||||
content := "**Analyze:** The user says something.\n\n**Final Decision:** Output the text.</think>this is a test"
|
||||
reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
|
||||
Expect(reasoning).To(Equal("**Analyze:** The user says something.\n\n**Final Decision:** Output the text."))
|
||||
Expect(cleaned).To(Equal("this is a test"))
|
||||
})
|
||||
|
||||
It("should handle multiline reasoning with closing tag", func() {
|
||||
content := "1. First point\n2. Second point\n3. Third point</think>Final answer"
|
||||
reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
|
||||
Expect(reasoning).To(Equal("1. First point\n2. Second point\n3. Third point"))
|
||||
Expect(cleaned).To(Equal("Final answer"))
|
||||
})
|
||||
|
||||
It("should handle </thinking> closing tag", func() {
|
||||
content := "Some reasoning here</thinking>actual content"
|
||||
reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
|
||||
Expect(reasoning).To(Equal("Some reasoning here"))
|
||||
Expect(cleaned).To(Equal("actual content"))
|
||||
})
|
||||
|
||||
It("should handle additional reasoning blocks after initial forced open", func() {
|
||||
content := "Initial reasoning</think>content<think>more reasoning</think>final content"
|
||||
reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
|
||||
Expect(reasoning).To(Equal("Initial reasoning\n\nmore reasoning"))
|
||||
Expect(cleaned).To(Equal("contentfinal content"))
|
||||
})
|
||||
|
||||
It("should handle empty content", func() {
|
||||
reasoning, cleaned := Extract("", WithThinkingForcedOpen())
|
||||
Expect(reasoning).To(BeEmpty())
|
||||
Expect(cleaned).To(BeEmpty())
|
||||
})
|
||||
|
||||
It("should handle only closing tag", func() {
|
||||
content := "</think>only content"
|
||||
reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
|
||||
Expect(reasoning).To(BeEmpty())
|
||||
Expect(cleaned).To(Equal("only content"))
|
||||
})
|
||||
|
||||
It("should find earliest closing tag", func() {
|
||||
// </think> comes before </thinking>
|
||||
content := "Reasoning</think>content</thinking>more"
|
||||
reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
|
||||
Expect(reasoning).To(Equal("Reasoning"))
|
||||
Expect(cleaned).To(Equal("content</thinking>more"))
|
||||
})
|
||||
|
||||
It("should handle Command R7B closing tag", func() {
|
||||
content := "Reasoning content<|END_THINKING|>actual response"
|
||||
reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
|
||||
Expect(reasoning).To(Equal("Reasoning content"))
|
||||
Expect(cleaned).To(Equal("actual response"))
|
||||
})
|
||||
|
||||
It("should handle Apertus closing tag", func() {
|
||||
content := "Reasoning content<|inner_suffix|>actual response"
|
||||
reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
|
||||
Expect(reasoning).To(Equal("Reasoning content"))
|
||||
Expect(cleaned).To(Equal("actual response"))
|
||||
})
|
||||
|
||||
It("should handle Seed closing tag", func() {
|
||||
content := "Reasoning content</seed:think>actual response"
|
||||
reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
|
||||
Expect(reasoning).To(Equal("Reasoning content"))
|
||||
Expect(cleaned).To(Equal("actual response"))
|
||||
})
|
||||
|
||||
It("should handle Magistral closing tag", func() {
|
||||
content := "Reasoning content[/THINK]actual response"
|
||||
reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
|
||||
Expect(reasoning).To(Equal("Reasoning content"))
|
||||
Expect(cleaned).To(Equal("actual response"))
|
||||
})
|
||||
})
|
||||
|
||||
Context("with model-specific tag pairs", func() {
|
||||
It("should extract Command R7B reasoning tags", func() {
|
||||
content := "Before <|START_THINKING|>reasoning here<|END_THINKING|> After"
|
||||
reasoning, cleaned := Extract(content)
|
||||
Expect(reasoning).To(Equal("reasoning here"))
|
||||
Expect(cleaned).To(Equal("Before After"))
|
||||
})
|
||||
|
||||
It("should extract Apertus reasoning tags", func() {
|
||||
content := "Before <|inner_prefix|>reasoning here<|inner_suffix|> After"
|
||||
reasoning, cleaned := Extract(content)
|
||||
Expect(reasoning).To(Equal("reasoning here"))
|
||||
Expect(cleaned).To(Equal("Before After"))
|
||||
})
|
||||
|
||||
It("should extract Seed reasoning tags", func() {
|
||||
content := "Before <seed:think>reasoning here</seed:think> After"
|
||||
reasoning, cleaned := Extract(content)
|
||||
Expect(reasoning).To(Equal("reasoning here"))
|
||||
Expect(cleaned).To(Equal("Before After"))
|
||||
})
|
||||
|
||||
It("should extract Magistral reasoning tags", func() {
|
||||
content := "Before [THINK]reasoning here[/THINK] After"
|
||||
reasoning, cleaned := Extract(content)
|
||||
Expect(reasoning).To(Equal("reasoning here"))
|
||||
Expect(cleaned).To(Equal("Before After"))
|
||||
})
|
||||
|
||||
It("should handle unclosed Command R7B tag", func() {
|
||||
content := "Before <|START_THINKING|>reasoning still streaming"
|
||||
reasoning, cleaned := Extract(content)
|
||||
Expect(reasoning).To(Equal("reasoning still streaming"))
|
||||
Expect(cleaned).To(Equal("Before "))
|
||||
})
|
||||
|
||||
It("should handle unclosed Apertus tag", func() {
|
||||
content := "Before <|inner_prefix|>reasoning still streaming"
|
||||
reasoning, cleaned := Extract(content)
|
||||
Expect(reasoning).To(Equal("reasoning still streaming"))
|
||||
Expect(cleaned).To(Equal("Before "))
|
||||
})
|
||||
|
||||
It("should handle unclosed Seed tag", func() {
|
||||
content := "Before <seed:think>reasoning still streaming"
|
||||
reasoning, cleaned := Extract(content)
|
||||
Expect(reasoning).To(Equal("reasoning still streaming"))
|
||||
Expect(cleaned).To(Equal("Before "))
|
||||
})
|
||||
|
||||
It("should handle unclosed Magistral tag", func() {
|
||||
content := "Before [THINK]reasoning still streaming"
|
||||
reasoning, cleaned := Extract(content)
|
||||
Expect(reasoning).To(Equal("reasoning still streaming"))
|
||||
Expect(cleaned).To(Equal("Before "))
|
||||
})
|
||||
|
||||
It("should handle closing-only Command R7B tag", func() {
|
||||
content := "Reasoning content<|END_THINKING|>actual response"
|
||||
reasoning, cleaned := Extract(content)
|
||||
Expect(reasoning).To(Equal("Reasoning content"))
|
||||
Expect(cleaned).To(Equal("actual response"))
|
||||
})
|
||||
|
||||
It("should handle closing-only Apertus tag", func() {
|
||||
content := "Reasoning content<|inner_suffix|>actual response"
|
||||
reasoning, cleaned := Extract(content)
|
||||
Expect(reasoning).To(Equal("Reasoning content"))
|
||||
Expect(cleaned).To(Equal("actual response"))
|
||||
})
|
||||
|
||||
It("should handle closing-only Seed tag", func() {
|
||||
content := "Reasoning content</seed:think>actual response"
|
||||
reasoning, cleaned := Extract(content)
|
||||
Expect(reasoning).To(Equal("Reasoning content"))
|
||||
Expect(cleaned).To(Equal("actual response"))
|
||||
})
|
||||
|
||||
It("should handle closing-only Magistral tag", func() {
|
||||
content := "Reasoning content[/THINK]actual response"
|
||||
reasoning, cleaned := Extract(content)
|
||||
Expect(reasoning).To(Equal("Reasoning content"))
|
||||
Expect(cleaned).To(Equal("actual response"))
|
||||
})
|
||||
})
|
||||
})
|
||||
534
swagger/docs.go
534
swagger/docs.go
@@ -1259,6 +1259,116 @@ const docTemplate = `{
|
||||
}
|
||||
}
|
||||
},
|
||||
"/v1/responses": {
|
||||
"post": {
|
||||
"summary": "Create a response using the Open Responses API",
|
||||
"parameters": [
|
||||
{
|
||||
"description": "Request body",
|
||||
"name": "request",
|
||||
"in": "body",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"$ref": "#/definitions/schema.OpenResponsesRequest"
|
||||
}
|
||||
}
|
||||
],
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "Response",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/schema.ORResponseResource"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"/v1/responses/{id}": {
|
||||
"get": {
|
||||
"description": "Retrieve a response by ID. Can be used for polling background responses or resuming streaming responses.",
|
||||
"summary": "Get a response by ID",
|
||||
"parameters": [
|
||||
{
|
||||
"type": "string",
|
||||
"description": "Response ID",
|
||||
"name": "id",
|
||||
"in": "path",
|
||||
"required": true
|
||||
},
|
||||
{
|
||||
"type": "string",
|
||||
"description": "Set to 'true' to resume streaming",
|
||||
"name": "stream",
|
||||
"in": "query"
|
||||
},
|
||||
{
|
||||
"type": "integer",
|
||||
"description": "Sequence number to resume from (for streaming)",
|
||||
"name": "starting_after",
|
||||
"in": "query"
|
||||
}
|
||||
],
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "Response",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/schema.ORResponseResource"
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"description": "Bad Request",
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"additionalProperties": true
|
||||
}
|
||||
},
|
||||
"404": {
|
||||
"description": "Not Found",
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"additionalProperties": true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"/v1/responses/{id}/cancel": {
|
||||
"post": {
|
||||
"description": "Cancel a background response if it's still in progress",
|
||||
"summary": "Cancel a response",
|
||||
"parameters": [
|
||||
{
|
||||
"type": "string",
|
||||
"description": "Response ID",
|
||||
"name": "id",
|
||||
"in": "path",
|
||||
"required": true
|
||||
}
|
||||
],
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "Response",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/schema.ORResponseResource"
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"description": "Bad Request",
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"additionalProperties": true
|
||||
}
|
||||
},
|
||||
"404": {
|
||||
"description": "Not Found",
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"additionalProperties": true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"/v1/sound-generation": {
|
||||
"post": {
|
||||
"summary": "Generates audio from the input text.",
|
||||
@@ -2507,6 +2617,322 @@ const docTemplate = `{
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.ORError": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"code": {
|
||||
"type": "string"
|
||||
},
|
||||
"message": {
|
||||
"type": "string"
|
||||
},
|
||||
"param": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": {
|
||||
"description": "invalid_request|not_found|server_error|model_error|too_many_requests",
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.ORFunctionTool": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"description": {
|
||||
"type": "string"
|
||||
},
|
||||
"name": {
|
||||
"type": "string"
|
||||
},
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"additionalProperties": true
|
||||
},
|
||||
"strict": {
|
||||
"description": "Always include in response",
|
||||
"type": "boolean"
|
||||
},
|
||||
"type": {
|
||||
"description": "always \"function\"",
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.ORIncompleteDetails": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"reason": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.ORInputTokensDetails": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"cached_tokens": {
|
||||
"description": "Always include, even if 0",
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.ORItemField": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"arguments": {
|
||||
"type": "string"
|
||||
},
|
||||
"call_id": {
|
||||
"description": "Function call fields",
|
||||
"type": "string"
|
||||
},
|
||||
"content": {
|
||||
"description": "string or []ORContentPart for messages"
|
||||
},
|
||||
"id": {
|
||||
"description": "Present for all output items",
|
||||
"type": "string"
|
||||
},
|
||||
"name": {
|
||||
"type": "string"
|
||||
},
|
||||
"output": {
|
||||
"description": "Function call output fields"
|
||||
},
|
||||
"role": {
|
||||
"description": "Message fields",
|
||||
"type": "string"
|
||||
},
|
||||
"status": {
|
||||
"description": "in_progress|completed|incomplete",
|
||||
"type": "string"
|
||||
},
|
||||
"type": {
|
||||
"description": "message|function_call|function_call_output|reasoning|item_reference",
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.OROutputTokensDetails": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"reasoning_tokens": {
|
||||
"description": "Always include, even if 0",
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.ORReasoning": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"effort": {
|
||||
"type": "string"
|
||||
},
|
||||
"summary": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.ORReasoningParam": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"effort": {
|
||||
"description": "\"none\"|\"low\"|\"medium\"|\"high\"|\"xhigh\"",
|
||||
"type": "string"
|
||||
},
|
||||
"summary": {
|
||||
"description": "\"auto\"|\"concise\"|\"detailed\"",
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.ORResponseResource": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"background": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"completed_at": {
|
||||
"description": "Required: present as number or null",
|
||||
"type": "integer"
|
||||
},
|
||||
"created_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"error": {
|
||||
"description": "Always present, null if no error",
|
||||
"allOf": [
|
||||
{
|
||||
"$ref": "#/definitions/schema.ORError"
|
||||
}
|
||||
]
|
||||
},
|
||||
"frequency_penalty": {
|
||||
"type": "number"
|
||||
},
|
||||
"id": {
|
||||
"type": "string"
|
||||
},
|
||||
"incomplete_details": {
|
||||
"description": "Always present, null if complete",
|
||||
"allOf": [
|
||||
{
|
||||
"$ref": "#/definitions/schema.ORIncompleteDetails"
|
||||
}
|
||||
]
|
||||
},
|
||||
"instructions": {
|
||||
"type": "string"
|
||||
},
|
||||
"max_output_tokens": {
|
||||
"type": "integer"
|
||||
},
|
||||
"max_tool_calls": {
|
||||
"description": "nullable",
|
||||
"type": "integer"
|
||||
},
|
||||
"metadata": {
|
||||
"description": "Metadata and operational flags",
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"model": {
|
||||
"type": "string"
|
||||
},
|
||||
"object": {
|
||||
"description": "always \"response\"",
|
||||
"type": "string"
|
||||
},
|
||||
"output": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/definitions/schema.ORItemField"
|
||||
}
|
||||
},
|
||||
"parallel_tool_calls": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"presence_penalty": {
|
||||
"type": "number"
|
||||
},
|
||||
"previous_response_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"prompt_cache_key": {
|
||||
"description": "nullable",
|
||||
"type": "string"
|
||||
},
|
||||
"reasoning": {
|
||||
"description": "nullable",
|
||||
"allOf": [
|
||||
{
|
||||
"$ref": "#/definitions/schema.ORReasoning"
|
||||
}
|
||||
]
|
||||
},
|
||||
"safety_identifier": {
|
||||
"description": "Safety and caching",
|
||||
"type": "string"
|
||||
},
|
||||
"service_tier": {
|
||||
"type": "string"
|
||||
},
|
||||
"status": {
|
||||
"description": "in_progress|completed|failed|incomplete",
|
||||
"type": "string"
|
||||
},
|
||||
"store": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"temperature": {
|
||||
"description": "Sampling parameters (always required)",
|
||||
"type": "number"
|
||||
},
|
||||
"text": {
|
||||
"description": "Text format configuration",
|
||||
"allOf": [
|
||||
{
|
||||
"$ref": "#/definitions/schema.ORTextConfig"
|
||||
}
|
||||
]
|
||||
},
|
||||
"tool_choice": {},
|
||||
"tools": {
|
||||
"description": "Tool-related fields",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/definitions/schema.ORFunctionTool"
|
||||
}
|
||||
},
|
||||
"top_logprobs": {
|
||||
"description": "Default to 0",
|
||||
"type": "integer"
|
||||
},
|
||||
"top_p": {
|
||||
"type": "number"
|
||||
},
|
||||
"truncation": {
|
||||
"description": "Truncation and reasoning",
|
||||
"type": "string"
|
||||
},
|
||||
"usage": {
|
||||
"description": "Usage statistics",
|
||||
"allOf": [
|
||||
{
|
||||
"$ref": "#/definitions/schema.ORUsage"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.ORTextConfig": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"format": {
|
||||
"$ref": "#/definitions/schema.ORTextFormat"
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.ORTextFormat": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"description": "\"text\" or \"json_schema\"",
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.ORUsage": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"input_tokens": {
|
||||
"type": "integer"
|
||||
},
|
||||
"input_tokens_details": {
|
||||
"description": "Always present",
|
||||
"allOf": [
|
||||
{
|
||||
"$ref": "#/definitions/schema.ORInputTokensDetails"
|
||||
}
|
||||
]
|
||||
},
|
||||
"output_tokens": {
|
||||
"type": "integer"
|
||||
},
|
||||
"output_tokens_details": {
|
||||
"description": "Always present",
|
||||
"allOf": [
|
||||
{
|
||||
"$ref": "#/definitions/schema.OROutputTokensDetails"
|
||||
}
|
||||
]
|
||||
},
|
||||
"total_tokens": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.OpenAIModel": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
@@ -2781,6 +3207,114 @@ const docTemplate = `{
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.OpenResponsesRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"allowed_tools": {
|
||||
"description": "Restrict which tools can be invoked",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"background": {
|
||||
"description": "Run request in background",
|
||||
"type": "boolean"
|
||||
},
|
||||
"frequency_penalty": {
|
||||
"description": "Frequency penalty (-2.0 to 2.0)",
|
||||
"type": "number"
|
||||
},
|
||||
"include": {
|
||||
"description": "What to include in response",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"input": {
|
||||
"description": "string or []ORItemParam"
|
||||
},
|
||||
"instructions": {
|
||||
"type": "string"
|
||||
},
|
||||
"logit_bias": {
|
||||
"description": "OpenAI-compatible extensions (not in Open Responses spec)",
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "number",
|
||||
"format": "float64"
|
||||
}
|
||||
},
|
||||
"max_output_tokens": {
|
||||
"type": "integer"
|
||||
},
|
||||
"max_tool_calls": {
|
||||
"description": "Maximum number of tool calls",
|
||||
"type": "integer"
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"model": {
|
||||
"type": "string"
|
||||
},
|
||||
"parallel_tool_calls": {
|
||||
"description": "Allow parallel tool calls",
|
||||
"type": "boolean"
|
||||
},
|
||||
"presence_penalty": {
|
||||
"description": "Presence penalty (-2.0 to 2.0)",
|
||||
"type": "number"
|
||||
},
|
||||
"previous_response_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"reasoning": {
|
||||
"$ref": "#/definitions/schema.ORReasoningParam"
|
||||
},
|
||||
"service_tier": {
|
||||
"description": "\"auto\"|\"default\"|priority hint",
|
||||
"type": "string"
|
||||
},
|
||||
"store": {
|
||||
"description": "Whether to store the response",
|
||||
"type": "boolean"
|
||||
},
|
||||
"stream": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"temperature": {
|
||||
"type": "number"
|
||||
},
|
||||
"text_format": {
|
||||
"description": "Additional parameters from spec"
|
||||
},
|
||||
"tool_choice": {
|
||||
"description": "\"auto\"|\"required\"|\"none\"|{type:\"function\",name:\"...\"}"
|
||||
},
|
||||
"tools": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/definitions/schema.ORFunctionTool"
|
||||
}
|
||||
},
|
||||
"top_logprobs": {
|
||||
"description": "Number of top logprobs to return",
|
||||
"type": "integer"
|
||||
},
|
||||
"top_p": {
|
||||
"type": "number"
|
||||
},
|
||||
"truncation": {
|
||||
"description": "\"auto\"|\"disabled\"",
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.P2PNodesResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
||||
@@ -1252,6 +1252,116 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"/v1/responses": {
|
||||
"post": {
|
||||
"summary": "Create a response using the Open Responses API",
|
||||
"parameters": [
|
||||
{
|
||||
"description": "Request body",
|
||||
"name": "request",
|
||||
"in": "body",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"$ref": "#/definitions/schema.OpenResponsesRequest"
|
||||
}
|
||||
}
|
||||
],
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "Response",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/schema.ORResponseResource"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"/v1/responses/{id}": {
|
||||
"get": {
|
||||
"description": "Retrieve a response by ID. Can be used for polling background responses or resuming streaming responses.",
|
||||
"summary": "Get a response by ID",
|
||||
"parameters": [
|
||||
{
|
||||
"type": "string",
|
||||
"description": "Response ID",
|
||||
"name": "id",
|
||||
"in": "path",
|
||||
"required": true
|
||||
},
|
||||
{
|
||||
"type": "string",
|
||||
"description": "Set to 'true' to resume streaming",
|
||||
"name": "stream",
|
||||
"in": "query"
|
||||
},
|
||||
{
|
||||
"type": "integer",
|
||||
"description": "Sequence number to resume from (for streaming)",
|
||||
"name": "starting_after",
|
||||
"in": "query"
|
||||
}
|
||||
],
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "Response",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/schema.ORResponseResource"
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"description": "Bad Request",
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"additionalProperties": true
|
||||
}
|
||||
},
|
||||
"404": {
|
||||
"description": "Not Found",
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"additionalProperties": true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"/v1/responses/{id}/cancel": {
|
||||
"post": {
|
||||
"description": "Cancel a background response if it's still in progress",
|
||||
"summary": "Cancel a response",
|
||||
"parameters": [
|
||||
{
|
||||
"type": "string",
|
||||
"description": "Response ID",
|
||||
"name": "id",
|
||||
"in": "path",
|
||||
"required": true
|
||||
}
|
||||
],
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "Response",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/schema.ORResponseResource"
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"description": "Bad Request",
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"additionalProperties": true
|
||||
}
|
||||
},
|
||||
"404": {
|
||||
"description": "Not Found",
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"additionalProperties": true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"/v1/sound-generation": {
|
||||
"post": {
|
||||
"summary": "Generates audio from the input text.",
|
||||
@@ -2500,6 +2610,322 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.ORError": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"code": {
|
||||
"type": "string"
|
||||
},
|
||||
"message": {
|
||||
"type": "string"
|
||||
},
|
||||
"param": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": {
|
||||
"description": "invalid_request|not_found|server_error|model_error|too_many_requests",
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.ORFunctionTool": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"description": {
|
||||
"type": "string"
|
||||
},
|
||||
"name": {
|
||||
"type": "string"
|
||||
},
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"additionalProperties": true
|
||||
},
|
||||
"strict": {
|
||||
"description": "Always include in response",
|
||||
"type": "boolean"
|
||||
},
|
||||
"type": {
|
||||
"description": "always \"function\"",
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.ORIncompleteDetails": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"reason": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.ORInputTokensDetails": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"cached_tokens": {
|
||||
"description": "Always include, even if 0",
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.ORItemField": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"arguments": {
|
||||
"type": "string"
|
||||
},
|
||||
"call_id": {
|
||||
"description": "Function call fields",
|
||||
"type": "string"
|
||||
},
|
||||
"content": {
|
||||
"description": "string or []ORContentPart for messages"
|
||||
},
|
||||
"id": {
|
||||
"description": "Present for all output items",
|
||||
"type": "string"
|
||||
},
|
||||
"name": {
|
||||
"type": "string"
|
||||
},
|
||||
"output": {
|
||||
"description": "Function call output fields"
|
||||
},
|
||||
"role": {
|
||||
"description": "Message fields",
|
||||
"type": "string"
|
||||
},
|
||||
"status": {
|
||||
"description": "in_progress|completed|incomplete",
|
||||
"type": "string"
|
||||
},
|
||||
"type": {
|
||||
"description": "message|function_call|function_call_output|reasoning|item_reference",
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.OROutputTokensDetails": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"reasoning_tokens": {
|
||||
"description": "Always include, even if 0",
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.ORReasoning": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"effort": {
|
||||
"type": "string"
|
||||
},
|
||||
"summary": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.ORReasoningParam": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"effort": {
|
||||
"description": "\"none\"|\"low\"|\"medium\"|\"high\"|\"xhigh\"",
|
||||
"type": "string"
|
||||
},
|
||||
"summary": {
|
||||
"description": "\"auto\"|\"concise\"|\"detailed\"",
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.ORResponseResource": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"background": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"completed_at": {
|
||||
"description": "Required: present as number or null",
|
||||
"type": "integer"
|
||||
},
|
||||
"created_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"error": {
|
||||
"description": "Always present, null if no error",
|
||||
"allOf": [
|
||||
{
|
||||
"$ref": "#/definitions/schema.ORError"
|
||||
}
|
||||
]
|
||||
},
|
||||
"frequency_penalty": {
|
||||
"type": "number"
|
||||
},
|
||||
"id": {
|
||||
"type": "string"
|
||||
},
|
||||
"incomplete_details": {
|
||||
"description": "Always present, null if complete",
|
||||
"allOf": [
|
||||
{
|
||||
"$ref": "#/definitions/schema.ORIncompleteDetails"
|
||||
}
|
||||
]
|
||||
},
|
||||
"instructions": {
|
||||
"type": "string"
|
||||
},
|
||||
"max_output_tokens": {
|
||||
"type": "integer"
|
||||
},
|
||||
"max_tool_calls": {
|
||||
"description": "nullable",
|
||||
"type": "integer"
|
||||
},
|
||||
"metadata": {
|
||||
"description": "Metadata and operational flags",
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"model": {
|
||||
"type": "string"
|
||||
},
|
||||
"object": {
|
||||
"description": "always \"response\"",
|
||||
"type": "string"
|
||||
},
|
||||
"output": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/definitions/schema.ORItemField"
|
||||
}
|
||||
},
|
||||
"parallel_tool_calls": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"presence_penalty": {
|
||||
"type": "number"
|
||||
},
|
||||
"previous_response_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"prompt_cache_key": {
|
||||
"description": "nullable",
|
||||
"type": "string"
|
||||
},
|
||||
"reasoning": {
|
||||
"description": "nullable",
|
||||
"allOf": [
|
||||
{
|
||||
"$ref": "#/definitions/schema.ORReasoning"
|
||||
}
|
||||
]
|
||||
},
|
||||
"safety_identifier": {
|
||||
"description": "Safety and caching",
|
||||
"type": "string"
|
||||
},
|
||||
"service_tier": {
|
||||
"type": "string"
|
||||
},
|
||||
"status": {
|
||||
"description": "in_progress|completed|failed|incomplete",
|
||||
"type": "string"
|
||||
},
|
||||
"store": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"temperature": {
|
||||
"description": "Sampling parameters (always required)",
|
||||
"type": "number"
|
||||
},
|
||||
"text": {
|
||||
"description": "Text format configuration",
|
||||
"allOf": [
|
||||
{
|
||||
"$ref": "#/definitions/schema.ORTextConfig"
|
||||
}
|
||||
]
|
||||
},
|
||||
"tool_choice": {},
|
||||
"tools": {
|
||||
"description": "Tool-related fields",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/definitions/schema.ORFunctionTool"
|
||||
}
|
||||
},
|
||||
"top_logprobs": {
|
||||
"description": "Default to 0",
|
||||
"type": "integer"
|
||||
},
|
||||
"top_p": {
|
||||
"type": "number"
|
||||
},
|
||||
"truncation": {
|
||||
"description": "Truncation and reasoning",
|
||||
"type": "string"
|
||||
},
|
||||
"usage": {
|
||||
"description": "Usage statistics",
|
||||
"allOf": [
|
||||
{
|
||||
"$ref": "#/definitions/schema.ORUsage"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.ORTextConfig": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"format": {
|
||||
"$ref": "#/definitions/schema.ORTextFormat"
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.ORTextFormat": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"description": "\"text\" or \"json_schema\"",
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.ORUsage": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"input_tokens": {
|
||||
"type": "integer"
|
||||
},
|
||||
"input_tokens_details": {
|
||||
"description": "Always present",
|
||||
"allOf": [
|
||||
{
|
||||
"$ref": "#/definitions/schema.ORInputTokensDetails"
|
||||
}
|
||||
]
|
||||
},
|
||||
"output_tokens": {
|
||||
"type": "integer"
|
||||
},
|
||||
"output_tokens_details": {
|
||||
"description": "Always present",
|
||||
"allOf": [
|
||||
{
|
||||
"$ref": "#/definitions/schema.OROutputTokensDetails"
|
||||
}
|
||||
]
|
||||
},
|
||||
"total_tokens": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.OpenAIModel": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
@@ -2774,6 +3200,114 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.OpenResponsesRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"allowed_tools": {
|
||||
"description": "Restrict which tools can be invoked",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"background": {
|
||||
"description": "Run request in background",
|
||||
"type": "boolean"
|
||||
},
|
||||
"frequency_penalty": {
|
||||
"description": "Frequency penalty (-2.0 to 2.0)",
|
||||
"type": "number"
|
||||
},
|
||||
"include": {
|
||||
"description": "What to include in response",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"input": {
|
||||
"description": "string or []ORItemParam"
|
||||
},
|
||||
"instructions": {
|
||||
"type": "string"
|
||||
},
|
||||
"logit_bias": {
|
||||
"description": "OpenAI-compatible extensions (not in Open Responses spec)",
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "number",
|
||||
"format": "float64"
|
||||
}
|
||||
},
|
||||
"max_output_tokens": {
|
||||
"type": "integer"
|
||||
},
|
||||
"max_tool_calls": {
|
||||
"description": "Maximum number of tool calls",
|
||||
"type": "integer"
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"model": {
|
||||
"type": "string"
|
||||
},
|
||||
"parallel_tool_calls": {
|
||||
"description": "Allow parallel tool calls",
|
||||
"type": "boolean"
|
||||
},
|
||||
"presence_penalty": {
|
||||
"description": "Presence penalty (-2.0 to 2.0)",
|
||||
"type": "number"
|
||||
},
|
||||
"previous_response_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"reasoning": {
|
||||
"$ref": "#/definitions/schema.ORReasoningParam"
|
||||
},
|
||||
"service_tier": {
|
||||
"description": "\"auto\"|\"default\"|priority hint",
|
||||
"type": "string"
|
||||
},
|
||||
"store": {
|
||||
"description": "Whether to store the response",
|
||||
"type": "boolean"
|
||||
},
|
||||
"stream": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"temperature": {
|
||||
"type": "number"
|
||||
},
|
||||
"text_format": {
|
||||
"description": "Additional parameters from spec"
|
||||
},
|
||||
"tool_choice": {
|
||||
"description": "\"auto\"|\"required\"|\"none\"|{type:\"function\",name:\"...\"}"
|
||||
},
|
||||
"tools": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/definitions/schema.ORFunctionTool"
|
||||
}
|
||||
},
|
||||
"top_logprobs": {
|
||||
"description": "Number of top logprobs to return",
|
||||
"type": "integer"
|
||||
},
|
||||
"top_p": {
|
||||
"type": "number"
|
||||
},
|
||||
"truncation": {
|
||||
"description": "\"auto\"|\"disabled\"",
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.P2PNodesResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
||||
@@ -742,6 +742,212 @@ definitions:
|
||||
tunnelAddress:
|
||||
type: string
|
||||
type: object
|
||||
schema.ORError:
|
||||
properties:
|
||||
code:
|
||||
type: string
|
||||
message:
|
||||
type: string
|
||||
param:
|
||||
type: string
|
||||
type:
|
||||
description: invalid_request|not_found|server_error|model_error|too_many_requests
|
||||
type: string
|
||||
type: object
|
||||
schema.ORFunctionTool:
|
||||
properties:
|
||||
description:
|
||||
type: string
|
||||
name:
|
||||
type: string
|
||||
parameters:
|
||||
additionalProperties: true
|
||||
type: object
|
||||
strict:
|
||||
description: Always include in response
|
||||
type: boolean
|
||||
type:
|
||||
description: always "function"
|
||||
type: string
|
||||
type: object
|
||||
schema.ORIncompleteDetails:
|
||||
properties:
|
||||
reason:
|
||||
type: string
|
||||
type: object
|
||||
schema.ORInputTokensDetails:
|
||||
properties:
|
||||
cached_tokens:
|
||||
description: Always include, even if 0
|
||||
type: integer
|
||||
type: object
|
||||
schema.ORItemField:
|
||||
properties:
|
||||
arguments:
|
||||
type: string
|
||||
call_id:
|
||||
description: Function call fields
|
||||
type: string
|
||||
content:
|
||||
description: string or []ORContentPart for messages
|
||||
id:
|
||||
description: Present for all output items
|
||||
type: string
|
||||
name:
|
||||
type: string
|
||||
output:
|
||||
description: Function call output fields
|
||||
role:
|
||||
description: Message fields
|
||||
type: string
|
||||
status:
|
||||
description: in_progress|completed|incomplete
|
||||
type: string
|
||||
type:
|
||||
description: message|function_call|function_call_output|reasoning|item_reference
|
||||
type: string
|
||||
type: object
|
||||
schema.OROutputTokensDetails:
|
||||
properties:
|
||||
reasoning_tokens:
|
||||
description: Always include, even if 0
|
||||
type: integer
|
||||
type: object
|
||||
schema.ORReasoning:
|
||||
properties:
|
||||
effort:
|
||||
type: string
|
||||
summary:
|
||||
type: string
|
||||
type: object
|
||||
schema.ORReasoningParam:
|
||||
properties:
|
||||
effort:
|
||||
description: '"none"|"low"|"medium"|"high"|"xhigh"'
|
||||
type: string
|
||||
summary:
|
||||
description: '"auto"|"concise"|"detailed"'
|
||||
type: string
|
||||
type: object
|
||||
schema.ORResponseResource:
|
||||
properties:
|
||||
background:
|
||||
type: boolean
|
||||
completed_at:
|
||||
description: 'Required: present as number or null'
|
||||
type: integer
|
||||
created_at:
|
||||
type: integer
|
||||
error:
|
||||
allOf:
|
||||
- $ref: '#/definitions/schema.ORError'
|
||||
description: Always present, null if no error
|
||||
frequency_penalty:
|
||||
type: number
|
||||
id:
|
||||
type: string
|
||||
incomplete_details:
|
||||
allOf:
|
||||
- $ref: '#/definitions/schema.ORIncompleteDetails'
|
||||
description: Always present, null if complete
|
||||
instructions:
|
||||
type: string
|
||||
max_output_tokens:
|
||||
type: integer
|
||||
max_tool_calls:
|
||||
description: nullable
|
||||
type: integer
|
||||
metadata:
|
||||
additionalProperties:
|
||||
type: string
|
||||
description: Metadata and operational flags
|
||||
type: object
|
||||
model:
|
||||
type: string
|
||||
object:
|
||||
description: always "response"
|
||||
type: string
|
||||
output:
|
||||
items:
|
||||
$ref: '#/definitions/schema.ORItemField'
|
||||
type: array
|
||||
parallel_tool_calls:
|
||||
type: boolean
|
||||
presence_penalty:
|
||||
type: number
|
||||
previous_response_id:
|
||||
type: string
|
||||
prompt_cache_key:
|
||||
description: nullable
|
||||
type: string
|
||||
reasoning:
|
||||
allOf:
|
||||
- $ref: '#/definitions/schema.ORReasoning'
|
||||
description: nullable
|
||||
safety_identifier:
|
||||
description: Safety and caching
|
||||
type: string
|
||||
service_tier:
|
||||
type: string
|
||||
status:
|
||||
description: in_progress|completed|failed|incomplete
|
||||
type: string
|
||||
store:
|
||||
type: boolean
|
||||
temperature:
|
||||
description: Sampling parameters (always required)
|
||||
type: number
|
||||
text:
|
||||
allOf:
|
||||
- $ref: '#/definitions/schema.ORTextConfig'
|
||||
description: Text format configuration
|
||||
tool_choice: {}
|
||||
tools:
|
||||
description: Tool-related fields
|
||||
items:
|
||||
$ref: '#/definitions/schema.ORFunctionTool'
|
||||
type: array
|
||||
top_logprobs:
|
||||
description: Default to 0
|
||||
type: integer
|
||||
top_p:
|
||||
type: number
|
||||
truncation:
|
||||
description: Truncation and reasoning
|
||||
type: string
|
||||
usage:
|
||||
allOf:
|
||||
- $ref: '#/definitions/schema.ORUsage'
|
||||
description: Usage statistics
|
||||
type: object
|
||||
schema.ORTextConfig:
|
||||
properties:
|
||||
format:
|
||||
$ref: '#/definitions/schema.ORTextFormat'
|
||||
type: object
|
||||
schema.ORTextFormat:
|
||||
properties:
|
||||
type:
|
||||
description: '"text" or "json_schema"'
|
||||
type: string
|
||||
type: object
|
||||
schema.ORUsage:
|
||||
properties:
|
||||
input_tokens:
|
||||
type: integer
|
||||
input_tokens_details:
|
||||
allOf:
|
||||
- $ref: '#/definitions/schema.ORInputTokensDetails'
|
||||
description: Always present
|
||||
output_tokens:
|
||||
type: integer
|
||||
output_tokens_details:
|
||||
allOf:
|
||||
- $ref: '#/definitions/schema.OROutputTokensDetails'
|
||||
description: Always present
|
||||
total_tokens:
|
||||
type: integer
|
||||
type: object
|
||||
schema.OpenAIModel:
|
||||
properties:
|
||||
id:
|
||||
@@ -936,6 +1142,82 @@ definitions:
|
||||
total_tokens:
|
||||
type: integer
|
||||
type: object
|
||||
schema.OpenResponsesRequest:
|
||||
properties:
|
||||
allowed_tools:
|
||||
description: Restrict which tools can be invoked
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
background:
|
||||
description: Run request in background
|
||||
type: boolean
|
||||
frequency_penalty:
|
||||
description: Frequency penalty (-2.0 to 2.0)
|
||||
type: number
|
||||
include:
|
||||
description: What to include in response
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
input:
|
||||
description: string or []ORItemParam
|
||||
instructions:
|
||||
type: string
|
||||
logit_bias:
|
||||
additionalProperties:
|
||||
format: float64
|
||||
type: number
|
||||
description: OpenAI-compatible extensions (not in Open Responses spec)
|
||||
type: object
|
||||
max_output_tokens:
|
||||
type: integer
|
||||
max_tool_calls:
|
||||
description: Maximum number of tool calls
|
||||
type: integer
|
||||
metadata:
|
||||
additionalProperties:
|
||||
type: string
|
||||
type: object
|
||||
model:
|
||||
type: string
|
||||
parallel_tool_calls:
|
||||
description: Allow parallel tool calls
|
||||
type: boolean
|
||||
presence_penalty:
|
||||
description: Presence penalty (-2.0 to 2.0)
|
||||
type: number
|
||||
previous_response_id:
|
||||
type: string
|
||||
reasoning:
|
||||
$ref: '#/definitions/schema.ORReasoningParam'
|
||||
service_tier:
|
||||
description: '"auto"|"default"|priority hint'
|
||||
type: string
|
||||
store:
|
||||
description: Whether to store the response
|
||||
type: boolean
|
||||
stream:
|
||||
type: boolean
|
||||
temperature:
|
||||
type: number
|
||||
text_format:
|
||||
description: Additional parameters from spec
|
||||
tool_choice:
|
||||
description: '"auto"|"required"|"none"|{type:"function",name:"..."}'
|
||||
tools:
|
||||
items:
|
||||
$ref: '#/definitions/schema.ORFunctionTool'
|
||||
type: array
|
||||
top_logprobs:
|
||||
description: Number of top logprobs to return
|
||||
type: integer
|
||||
top_p:
|
||||
type: number
|
||||
truncation:
|
||||
description: '"auto"|"disabled"'
|
||||
type: string
|
||||
type: object
|
||||
schema.P2PNodesResponse:
|
||||
properties:
|
||||
federated_nodes:
|
||||
@@ -1962,6 +2244,80 @@ paths:
|
||||
schema:
|
||||
$ref: '#/definitions/schema.JINARerankResponse'
|
||||
summary: Reranks a list of phrases by relevance to a given text query.
|
||||
/v1/responses:
|
||||
post:
|
||||
parameters:
|
||||
- description: Request body
|
||||
in: body
|
||||
name: request
|
||||
required: true
|
||||
schema:
|
||||
$ref: '#/definitions/schema.OpenResponsesRequest'
|
||||
responses:
|
||||
"200":
|
||||
description: Response
|
||||
schema:
|
||||
$ref: '#/definitions/schema.ORResponseResource'
|
||||
summary: Create a response using the Open Responses API
|
||||
/v1/responses/{id}:
|
||||
get:
|
||||
description: Retrieve a response by ID. Can be used for polling background responses
|
||||
or resuming streaming responses.
|
||||
parameters:
|
||||
- description: Response ID
|
||||
in: path
|
||||
name: id
|
||||
required: true
|
||||
type: string
|
||||
- description: Set to 'true' to resume streaming
|
||||
in: query
|
||||
name: stream
|
||||
type: string
|
||||
- description: Sequence number to resume from (for streaming)
|
||||
in: query
|
||||
name: starting_after
|
||||
type: integer
|
||||
responses:
|
||||
"200":
|
||||
description: Response
|
||||
schema:
|
||||
$ref: '#/definitions/schema.ORResponseResource'
|
||||
"400":
|
||||
description: Bad Request
|
||||
schema:
|
||||
additionalProperties: true
|
||||
type: object
|
||||
"404":
|
||||
description: Not Found
|
||||
schema:
|
||||
additionalProperties: true
|
||||
type: object
|
||||
summary: Get a response by ID
|
||||
/v1/responses/{id}/cancel:
|
||||
post:
|
||||
description: Cancel a background response if it's still in progress
|
||||
parameters:
|
||||
- description: Response ID
|
||||
in: path
|
||||
name: id
|
||||
required: true
|
||||
type: string
|
||||
responses:
|
||||
"200":
|
||||
description: Response
|
||||
schema:
|
||||
$ref: '#/definitions/schema.ORResponseResource'
|
||||
"400":
|
||||
description: Bad Request
|
||||
schema:
|
||||
additionalProperties: true
|
||||
type: object
|
||||
"404":
|
||||
description: Not Found
|
||||
schema:
|
||||
additionalProperties: true
|
||||
type: object
|
||||
summary: Cancel a response
|
||||
/v1/sound-generation:
|
||||
post:
|
||||
parameters:
|
||||
|
||||
Reference in New Issue
Block a user