mirror of
https://github.com/mudler/LocalAI.git
synced 2026-05-24 16:51:44 -04:00
Compare commits
23 Commits
test/ci
...
fix/reason
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
61a6e95f7d | ||
|
|
a352125726 | ||
|
|
187e474daf | ||
|
|
4bf2f8bbd8 | ||
|
|
d3525b7509 | ||
|
|
c8aa821e0e | ||
|
|
b3191927ae | ||
|
|
54c5a2d9ea | ||
|
|
0279591fec | ||
|
|
8845186955 | ||
|
|
ab8ed24358 | ||
|
|
a021df5a88 | ||
|
|
5f403b1631 | ||
|
|
897ad1729e | ||
|
|
16a18a2e55 | ||
|
|
3387bfaee0 | ||
|
|
1cd33047b4 | ||
|
|
1de045311a | ||
|
|
5fe9bf9f84 | ||
|
|
d4fd0c0609 | ||
|
|
d16722ee13 | ||
|
|
1f10ab39a9 | ||
|
|
4d36e393d1 |
23
.github/workflows/backend.yml
vendored
23
.github/workflows/backend.yml
vendored
@@ -137,7 +137,7 @@ jobs:
|
|||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'auto'
|
tag-latest: 'auto'
|
||||||
tag-suffix: '-gpu-nvidia-cuda-12-llama-cpp'
|
tag-suffix: '-gpu-nvidia-cuda-12-llama-cpp'
|
||||||
runs-on: 'ubuntu-latest'
|
runs-on: 'bigger-runner'
|
||||||
base-image: "ubuntu:24.04"
|
base-image: "ubuntu:24.04"
|
||||||
skip-drivers: 'false'
|
skip-drivers: 'false'
|
||||||
backend: "llama-cpp"
|
backend: "llama-cpp"
|
||||||
@@ -699,7 +699,7 @@ jobs:
|
|||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'auto'
|
tag-latest: 'auto'
|
||||||
tag-suffix: '-gpu-rocm-hipblas-faster-whisper'
|
tag-suffix: '-gpu-rocm-hipblas-faster-whisper'
|
||||||
runs-on: 'ubuntu-latest'
|
runs-on: 'bigger-runner'
|
||||||
base-image: "rocm/dev-ubuntu-24.04:6.4.4"
|
base-image: "rocm/dev-ubuntu-24.04:6.4.4"
|
||||||
skip-drivers: 'false'
|
skip-drivers: 'false'
|
||||||
backend: "faster-whisper"
|
backend: "faster-whisper"
|
||||||
@@ -712,7 +712,7 @@ jobs:
|
|||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'auto'
|
tag-latest: 'auto'
|
||||||
tag-suffix: '-gpu-rocm-hipblas-coqui'
|
tag-suffix: '-gpu-rocm-hipblas-coqui'
|
||||||
runs-on: 'ubuntu-latest'
|
runs-on: 'bigger-runner'
|
||||||
base-image: "rocm/dev-ubuntu-24.04:6.4.4"
|
base-image: "rocm/dev-ubuntu-24.04:6.4.4"
|
||||||
skip-drivers: 'false'
|
skip-drivers: 'false'
|
||||||
backend: "coqui"
|
backend: "coqui"
|
||||||
@@ -963,7 +963,7 @@ jobs:
|
|||||||
platforms: 'linux/amd64,linux/arm64'
|
platforms: 'linux/amd64,linux/arm64'
|
||||||
tag-latest: 'auto'
|
tag-latest: 'auto'
|
||||||
tag-suffix: '-cpu-llama-cpp'
|
tag-suffix: '-cpu-llama-cpp'
|
||||||
runs-on: 'ubuntu-latest'
|
runs-on: 'bigger-runner'
|
||||||
base-image: "ubuntu:24.04"
|
base-image: "ubuntu:24.04"
|
||||||
skip-drivers: 'false'
|
skip-drivers: 'false'
|
||||||
backend: "llama-cpp"
|
backend: "llama-cpp"
|
||||||
@@ -989,7 +989,7 @@ jobs:
|
|||||||
platforms: 'linux/amd64,linux/arm64'
|
platforms: 'linux/amd64,linux/arm64'
|
||||||
tag-latest: 'auto'
|
tag-latest: 'auto'
|
||||||
tag-suffix: '-gpu-vulkan-llama-cpp'
|
tag-suffix: '-gpu-vulkan-llama-cpp'
|
||||||
runs-on: 'ubuntu-latest'
|
runs-on: 'bigger-runner'
|
||||||
base-image: "ubuntu:24.04"
|
base-image: "ubuntu:24.04"
|
||||||
skip-drivers: 'false'
|
skip-drivers: 'false'
|
||||||
backend: "llama-cpp"
|
backend: "llama-cpp"
|
||||||
@@ -1330,19 +1330,6 @@ jobs:
|
|||||||
dockerfile: "./backend/Dockerfile.python"
|
dockerfile: "./backend/Dockerfile.python"
|
||||||
context: "./"
|
context: "./"
|
||||||
ubuntu-version: '2404'
|
ubuntu-version: '2404'
|
||||||
- build-type: 'l4t'
|
|
||||||
cuda-major-version: "12"
|
|
||||||
cuda-minor-version: "0"
|
|
||||||
platforms: 'linux/arm64'
|
|
||||||
skip-drivers: 'true'
|
|
||||||
tag-latest: 'auto'
|
|
||||||
tag-suffix: '-nvidia-l4t-arm64-neutts'
|
|
||||||
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
|
|
||||||
runs-on: 'ubuntu-24.04-arm'
|
|
||||||
backend: "neutts"
|
|
||||||
dockerfile: "./backend/Dockerfile.python"
|
|
||||||
context: "./"
|
|
||||||
ubuntu-version: '2204'
|
|
||||||
- build-type: ''
|
- build-type: ''
|
||||||
cuda-major-version: ""
|
cuda-major-version: ""
|
||||||
cuda-minor-version: ""
|
cuda-minor-version: ""
|
||||||
|
|||||||
10
.github/workflows/image-pr.yml
vendored
10
.github/workflows/image-pr.yml
vendored
@@ -41,7 +41,7 @@
|
|||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-gpu-nvidia-cuda-12'
|
tag-suffix: '-gpu-nvidia-cuda-12'
|
||||||
runs-on: 'bigger-runner'
|
runs-on: 'ubuntu-latest'
|
||||||
base-image: "ubuntu:24.04"
|
base-image: "ubuntu:24.04"
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
ubuntu-version: '2404'
|
ubuntu-version: '2404'
|
||||||
@@ -51,7 +51,7 @@
|
|||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-gpu-nvidia-cuda-13'
|
tag-suffix: '-gpu-nvidia-cuda-13'
|
||||||
runs-on: 'bigger-runner'
|
runs-on: 'ubuntu-latest'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
ubuntu-version: '2404'
|
ubuntu-version: '2404'
|
||||||
@@ -61,7 +61,7 @@
|
|||||||
tag-suffix: '-hipblas'
|
tag-suffix: '-hipblas'
|
||||||
base-image: "rocm/dev-ubuntu-24.04:6.4.4"
|
base-image: "rocm/dev-ubuntu-24.04:6.4.4"
|
||||||
grpc-base-image: "ubuntu:24.04"
|
grpc-base-image: "ubuntu:24.04"
|
||||||
runs-on: 'bigger-runner'
|
runs-on: 'ubuntu-latest'
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
ubuntu-version: '2404'
|
ubuntu-version: '2404'
|
||||||
- build-type: 'sycl'
|
- build-type: 'sycl'
|
||||||
@@ -70,14 +70,14 @@
|
|||||||
base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
|
base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
|
||||||
grpc-base-image: "ubuntu:24.04"
|
grpc-base-image: "ubuntu:24.04"
|
||||||
tag-suffix: 'sycl'
|
tag-suffix: 'sycl'
|
||||||
runs-on: 'bigger-runner'
|
runs-on: 'ubuntu-latest'
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
ubuntu-version: '2404'
|
ubuntu-version: '2404'
|
||||||
- build-type: 'vulkan'
|
- build-type: 'vulkan'
|
||||||
platforms: 'linux/amd64,linux/arm64'
|
platforms: 'linux/amd64,linux/arm64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-vulkan-core'
|
tag-suffix: '-vulkan-core'
|
||||||
runs-on: 'bigger-runner'
|
runs-on: 'ubuntu-latest'
|
||||||
base-image: "ubuntu:24.04"
|
base-image: "ubuntu:24.04"
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
ubuntu-version: '2404'
|
ubuntu-version: '2404'
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
|
|
||||||
LLAMA_VERSION?=785a71008573e2d84728fb0ba9e851d72d3f8fab
|
LLAMA_VERSION?=959ecf7f234dc0bc0cd6829b25cb0ee1481aa78a
|
||||||
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
|
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
|
||||||
|
|
||||||
CMAKE_ARGS?=
|
CMAKE_ARGS?=
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
|||||||
|
|
||||||
# stablediffusion.cpp (ggml)
|
# stablediffusion.cpp (ggml)
|
||||||
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
|
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
|
||||||
STABLEDIFFUSION_GGML_VERSION?=7010bb4dff7bd55b03d35ef9772142c21699eba9
|
STABLEDIFFUSION_GGML_VERSION?=a48b4a3ade9972faf0adcad47e51c6fc03f0e46d
|
||||||
|
|
||||||
CMAKE_ARGS+=-DGGML_MAX_NAME=128
|
CMAKE_ARGS+=-DGGML_MAX_NAME=128
|
||||||
|
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
|||||||
|
|
||||||
# whisper.cpp version
|
# whisper.cpp version
|
||||||
WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
|
WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
|
||||||
WHISPER_CPP_VERSION?=2eeeba56e9edd762b4b38467bab96c2517163158
|
WHISPER_CPP_VERSION?=f53dc74843e97f19f94a79241357f74ad5b691a6
|
||||||
SO_TARGET?=libgowhisper.so
|
SO_TARGET?=libgowhisper.so
|
||||||
|
|
||||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||||
|
|||||||
@@ -537,18 +537,14 @@
|
|||||||
default: "cpu-neutts"
|
default: "cpu-neutts"
|
||||||
nvidia: "cuda12-neutts"
|
nvidia: "cuda12-neutts"
|
||||||
amd: "rocm-neutts"
|
amd: "rocm-neutts"
|
||||||
nvidia-l4t: "nvidia-l4t-neutts"
|
|
||||||
nvidia-cuda-12: "cuda12-neutts"
|
nvidia-cuda-12: "cuda12-neutts"
|
||||||
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-neutts"
|
|
||||||
- !!merge <<: *neutts
|
- !!merge <<: *neutts
|
||||||
name: "neutts-development"
|
name: "neutts-development"
|
||||||
capabilities:
|
capabilities:
|
||||||
default: "cpu-neutts-development"
|
default: "cpu-neutts-development"
|
||||||
nvidia: "cuda12-neutts-development"
|
nvidia: "cuda12-neutts-development"
|
||||||
amd: "rocm-neutts-development"
|
amd: "rocm-neutts-development"
|
||||||
nvidia-l4t: "nvidia-l4t-neutts-development"
|
|
||||||
nvidia-cuda-12: "cuda12-neutts-development"
|
nvidia-cuda-12: "cuda12-neutts-development"
|
||||||
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-neutts-development"
|
|
||||||
- !!merge <<: *llamacpp
|
- !!merge <<: *llamacpp
|
||||||
name: "llama-cpp-development"
|
name: "llama-cpp-development"
|
||||||
capabilities:
|
capabilities:
|
||||||
@@ -578,11 +574,6 @@
|
|||||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-neutts"
|
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-neutts"
|
||||||
mirrors:
|
mirrors:
|
||||||
- localai/localai-backends:latest-gpu-rocm-hipblas-neutts
|
- localai/localai-backends:latest-gpu-rocm-hipblas-neutts
|
||||||
- !!merge <<: *neutts
|
|
||||||
name: "nvidia-l4t-arm64-neutts"
|
|
||||||
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-arm64-neutts"
|
|
||||||
mirrors:
|
|
||||||
- localai/localai-backends:latest-nvidia-l4t-arm64-neutts
|
|
||||||
- !!merge <<: *neutts
|
- !!merge <<: *neutts
|
||||||
name: "cpu-neutts-development"
|
name: "cpu-neutts-development"
|
||||||
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-neutts"
|
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-neutts"
|
||||||
@@ -598,11 +589,6 @@
|
|||||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-neutts"
|
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-neutts"
|
||||||
mirrors:
|
mirrors:
|
||||||
- localai/localai-backends:master-gpu-rocm-hipblas-neutts
|
- localai/localai-backends:master-gpu-rocm-hipblas-neutts
|
||||||
- !!merge <<: *neutts
|
|
||||||
name: "nvidia-l4t-arm64-neutts-development"
|
|
||||||
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-arm64-neutts"
|
|
||||||
mirrors:
|
|
||||||
- localai/localai-backends:master-nvidia-l4t-arm64-neutts
|
|
||||||
- !!merge <<: *mlx
|
- !!merge <<: *mlx
|
||||||
name: "mlx-development"
|
name: "mlx-development"
|
||||||
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-mlx"
|
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-mlx"
|
||||||
|
|||||||
@@ -83,6 +83,7 @@ type RunCMD struct {
|
|||||||
EnableTracing bool `env:"LOCALAI_ENABLE_TRACING,ENABLE_TRACING" help:"Enable API tracing" group:"api"`
|
EnableTracing bool `env:"LOCALAI_ENABLE_TRACING,ENABLE_TRACING" help:"Enable API tracing" group:"api"`
|
||||||
TracingMaxItems int `env:"LOCALAI_TRACING_MAX_ITEMS" default:"1024" help:"Maximum number of traces to keep" group:"api"`
|
TracingMaxItems int `env:"LOCALAI_TRACING_MAX_ITEMS" default:"1024" help:"Maximum number of traces to keep" group:"api"`
|
||||||
AgentJobRetentionDays int `env:"LOCALAI_AGENT_JOB_RETENTION_DAYS,AGENT_JOB_RETENTION_DAYS" default:"30" help:"Number of days to keep agent job history (default: 30)" group:"api"`
|
AgentJobRetentionDays int `env:"LOCALAI_AGENT_JOB_RETENTION_DAYS,AGENT_JOB_RETENTION_DAYS" default:"30" help:"Number of days to keep agent job history (default: 30)" group:"api"`
|
||||||
|
OpenResponsesStoreTTL string `env:"LOCALAI_OPEN_RESPONSES_STORE_TTL,OPEN_RESPONSES_STORE_TTL" default:"0" help:"TTL for Open Responses store (e.g., 1h, 30m, 0 = no expiration)" group:"api"`
|
||||||
|
|
||||||
Version bool
|
Version bool
|
||||||
}
|
}
|
||||||
@@ -249,6 +250,15 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
|||||||
opts = append(opts, config.WithLRUEvictionRetryInterval(dur))
|
opts = append(opts, config.WithLRUEvictionRetryInterval(dur))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Handle Open Responses store TTL
|
||||||
|
if r.OpenResponsesStoreTTL != "" && r.OpenResponsesStoreTTL != "0" {
|
||||||
|
dur, err := time.ParseDuration(r.OpenResponsesStoreTTL)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("invalid Open Responses store TTL: %w", err)
|
||||||
|
}
|
||||||
|
opts = append(opts, config.WithOpenResponsesStoreTTL(dur))
|
||||||
|
}
|
||||||
|
|
||||||
// split ":" to get backend name and the uri
|
// split ":" to get backend name and the uri
|
||||||
for _, v := range r.ExternalGRPCBackends {
|
for _, v := range r.ExternalGRPCBackends {
|
||||||
backend := v[:strings.IndexByte(v, ':')]
|
backend := v[:strings.IndexByte(v, ':')]
|
||||||
|
|||||||
@@ -86,6 +86,8 @@ type ApplicationConfig struct {
|
|||||||
|
|
||||||
AgentJobRetentionDays int // Default: 30 days
|
AgentJobRetentionDays int // Default: 30 days
|
||||||
|
|
||||||
|
OpenResponsesStoreTTL time.Duration // TTL for Open Responses store (0 = no expiration)
|
||||||
|
|
||||||
PathWithoutAuth []string
|
PathWithoutAuth []string
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -467,6 +469,12 @@ func WithAgentJobRetentionDays(days int) AppOption {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func WithOpenResponsesStoreTTL(ttl time.Duration) AppOption {
|
||||||
|
return func(o *ApplicationConfig) {
|
||||||
|
o.OpenResponsesStoreTTL = ttl
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func WithEnforcedPredownloadScans(enforced bool) AppOption {
|
func WithEnforcedPredownloadScans(enforced bool) AppOption {
|
||||||
return func(o *ApplicationConfig) {
|
return func(o *ApplicationConfig) {
|
||||||
o.EnforcePredownloadScans = enforced
|
o.EnforcePredownloadScans = enforced
|
||||||
@@ -594,6 +602,12 @@ func (o *ApplicationConfig) ToRuntimeSettings() RuntimeSettings {
|
|||||||
} else {
|
} else {
|
||||||
lruEvictionRetryInterval = "1s" // default
|
lruEvictionRetryInterval = "1s" // default
|
||||||
}
|
}
|
||||||
|
var openResponsesStoreTTL string
|
||||||
|
if o.OpenResponsesStoreTTL > 0 {
|
||||||
|
openResponsesStoreTTL = o.OpenResponsesStoreTTL.String()
|
||||||
|
} else {
|
||||||
|
openResponsesStoreTTL = "0" // default: no expiration
|
||||||
|
}
|
||||||
|
|
||||||
return RuntimeSettings{
|
return RuntimeSettings{
|
||||||
WatchdogEnabled: &watchdogEnabled,
|
WatchdogEnabled: &watchdogEnabled,
|
||||||
@@ -628,6 +642,7 @@ func (o *ApplicationConfig) ToRuntimeSettings() RuntimeSettings {
|
|||||||
AutoloadBackendGalleries: &autoloadBackendGalleries,
|
AutoloadBackendGalleries: &autoloadBackendGalleries,
|
||||||
ApiKeys: &apiKeys,
|
ApiKeys: &apiKeys,
|
||||||
AgentJobRetentionDays: &agentJobRetentionDays,
|
AgentJobRetentionDays: &agentJobRetentionDays,
|
||||||
|
OpenResponsesStoreTTL: &openResponsesStoreTTL,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -769,6 +784,14 @@ func (o *ApplicationConfig) ApplyRuntimeSettings(settings *RuntimeSettings) (req
|
|||||||
if settings.AgentJobRetentionDays != nil {
|
if settings.AgentJobRetentionDays != nil {
|
||||||
o.AgentJobRetentionDays = *settings.AgentJobRetentionDays
|
o.AgentJobRetentionDays = *settings.AgentJobRetentionDays
|
||||||
}
|
}
|
||||||
|
if settings.OpenResponsesStoreTTL != nil {
|
||||||
|
if *settings.OpenResponsesStoreTTL == "0" || *settings.OpenResponsesStoreTTL == "" {
|
||||||
|
o.OpenResponsesStoreTTL = 0 // No expiration
|
||||||
|
} else if dur, err := time.ParseDuration(*settings.OpenResponsesStoreTTL); err == nil {
|
||||||
|
o.OpenResponsesStoreTTL = dur
|
||||||
|
}
|
||||||
|
// This setting doesn't require restart, can be updated dynamically
|
||||||
|
}
|
||||||
// Note: ApiKeys requires special handling (merging with startup keys) - handled in caller
|
// Note: ApiKeys requires special handling (merging with startup keys) - handled in caller
|
||||||
|
|
||||||
return requireRestart
|
return requireRestart
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ import (
|
|||||||
"github.com/mudler/LocalAI/core/schema"
|
"github.com/mudler/LocalAI/core/schema"
|
||||||
"github.com/mudler/LocalAI/pkg/downloader"
|
"github.com/mudler/LocalAI/pkg/downloader"
|
||||||
"github.com/mudler/LocalAI/pkg/functions"
|
"github.com/mudler/LocalAI/pkg/functions"
|
||||||
|
"github.com/mudler/LocalAI/pkg/reasoning"
|
||||||
"github.com/mudler/cogito"
|
"github.com/mudler/cogito"
|
||||||
"gopkg.in/yaml.v3"
|
"gopkg.in/yaml.v3"
|
||||||
)
|
)
|
||||||
@@ -51,6 +52,7 @@ type ModelConfig struct {
|
|||||||
ResponseFormatMap map[string]interface{} `yaml:"-" json:"-"`
|
ResponseFormatMap map[string]interface{} `yaml:"-" json:"-"`
|
||||||
|
|
||||||
FunctionsConfig functions.FunctionsConfig `yaml:"function,omitempty" json:"function,omitempty"`
|
FunctionsConfig functions.FunctionsConfig `yaml:"function,omitempty" json:"function,omitempty"`
|
||||||
|
ReasoningConfig reasoning.ReasoningConfig `yaml:"reasoning,omitempty" json:"reasoning,omitempty"`
|
||||||
|
|
||||||
FeatureFlag FeatureFlag `yaml:"feature_flags,omitempty" json:"feature_flags,omitempty"` // Feature Flag registry. We move fast, and features may break on a per model/backend basis. Registry for (usually temporary) flags that indicate aborting something early.
|
FeatureFlag FeatureFlag `yaml:"feature_flags,omitempty" json:"feature_flags,omitempty"` // Feature Flag registry. We move fast, and features may break on a per model/backend basis. Registry for (usually temporary) flags that indicate aborting something early.
|
||||||
// LLM configs (GPT4ALL, Llama.cpp, ...)
|
// LLM configs (GPT4ALL, Llama.cpp, ...)
|
||||||
|
|||||||
@@ -60,4 +60,7 @@ type RuntimeSettings struct {
|
|||||||
|
|
||||||
// Agent settings
|
// Agent settings
|
||||||
AgentJobRetentionDays *int `json:"agent_job_retention_days,omitempty"`
|
AgentJobRetentionDays *int `json:"agent_job_retention_days,omitempty"`
|
||||||
|
|
||||||
|
// Open Responses settings
|
||||||
|
OpenResponsesStoreTTL *string `json:"open_responses_store_ttl,omitempty"` // TTL for stored responses (e.g., "1h", "30m", "0" = no expiration)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -193,6 +193,8 @@ func API(application *application.Application) (*echo.Echo, error) {
|
|||||||
corsConfig.AllowOrigins = strings.Split(application.ApplicationConfig().CORSAllowOrigins, ",")
|
corsConfig.AllowOrigins = strings.Split(application.ApplicationConfig().CORSAllowOrigins, ",")
|
||||||
}
|
}
|
||||||
e.Use(middleware.CORSWithConfig(corsConfig))
|
e.Use(middleware.CORSWithConfig(corsConfig))
|
||||||
|
} else {
|
||||||
|
e.Use(middleware.CORS())
|
||||||
}
|
}
|
||||||
|
|
||||||
// CSRF middleware
|
// CSRF middleware
|
||||||
@@ -214,6 +216,7 @@ func API(application *application.Application) (*echo.Echo, error) {
|
|||||||
routes.RegisterLocalAIRoutes(e, requestExtractor, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig(), application.GalleryService(), opcache, application.TemplatesEvaluator(), application)
|
routes.RegisterLocalAIRoutes(e, requestExtractor, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig(), application.GalleryService(), opcache, application.TemplatesEvaluator(), application)
|
||||||
routes.RegisterOpenAIRoutes(e, requestExtractor, application)
|
routes.RegisterOpenAIRoutes(e, requestExtractor, application)
|
||||||
routes.RegisterAnthropicRoutes(e, requestExtractor, application)
|
routes.RegisterAnthropicRoutes(e, requestExtractor, application)
|
||||||
|
routes.RegisterOpenResponsesRoutes(e, requestExtractor, application)
|
||||||
if !application.ApplicationConfig().DisableWebUI {
|
if !application.ApplicationConfig().DisableWebUI {
|
||||||
routes.RegisterUIAPIRoutes(e, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig(), application.GalleryService(), opcache, application)
|
routes.RegisterUIAPIRoutes(e, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig(), application.GalleryService(), opcache, application)
|
||||||
routes.RegisterUIRoutes(e, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig(), application.GalleryService())
|
routes.RegisterUIRoutes(e, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig(), application.GalleryService())
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ import (
|
|||||||
"github.com/labstack/echo/v4"
|
"github.com/labstack/echo/v4"
|
||||||
"github.com/mudler/LocalAI/core/application"
|
"github.com/mudler/LocalAI/core/application"
|
||||||
"github.com/mudler/LocalAI/core/config"
|
"github.com/mudler/LocalAI/core/config"
|
||||||
|
"github.com/mudler/LocalAI/core/http/endpoints/openresponses"
|
||||||
"github.com/mudler/LocalAI/core/p2p"
|
"github.com/mudler/LocalAI/core/p2p"
|
||||||
"github.com/mudler/LocalAI/core/schema"
|
"github.com/mudler/LocalAI/core/schema"
|
||||||
"github.com/mudler/xlog"
|
"github.com/mudler/xlog"
|
||||||
@@ -84,6 +85,16 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if settings.OpenResponsesStoreTTL != nil {
|
||||||
|
if *settings.OpenResponsesStoreTTL != "0" && *settings.OpenResponsesStoreTTL != "" {
|
||||||
|
if _, err := time.ParseDuration(*settings.OpenResponsesStoreTTL); err != nil {
|
||||||
|
return c.JSON(http.StatusBadRequest, schema.SettingsResponse{
|
||||||
|
Success: false,
|
||||||
|
Error: "Invalid open_responses_store_ttl format: " + err.Error(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Save to file
|
// Save to file
|
||||||
if appConfig.DynamicConfigsDir == "" {
|
if appConfig.DynamicConfigsDir == "" {
|
||||||
@@ -144,6 +155,22 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
|
|||||||
xlog.Info("Updated LRU eviction retry settings", "maxRetries", maxRetries, "retryInterval", retryInterval)
|
xlog.Info("Updated LRU eviction retry settings", "maxRetries", maxRetries, "retryInterval", retryInterval)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Update Open Responses store TTL dynamically
|
||||||
|
if settings.OpenResponsesStoreTTL != nil {
|
||||||
|
ttl := time.Duration(0)
|
||||||
|
if *settings.OpenResponsesStoreTTL != "0" && *settings.OpenResponsesStoreTTL != "" {
|
||||||
|
if dur, err := time.ParseDuration(*settings.OpenResponsesStoreTTL); err == nil {
|
||||||
|
ttl = dur
|
||||||
|
} else {
|
||||||
|
xlog.Warn("Invalid Open Responses store TTL format", "ttl", *settings.OpenResponsesStoreTTL, "error", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Import the store package
|
||||||
|
store := openresponses.GetGlobalStore()
|
||||||
|
store.SetTTL(ttl)
|
||||||
|
xlog.Info("Updated Open Responses store TTL", "ttl", ttl)
|
||||||
|
}
|
||||||
|
|
||||||
// Check if agent job retention changed
|
// Check if agent job retention changed
|
||||||
agentJobChanged := settings.AgentJobRetentionDays != nil
|
agentJobChanged := settings.AgentJobRetentionDays != nil
|
||||||
|
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ import (
|
|||||||
"github.com/mudler/LocalAI/core/http/middleware"
|
"github.com/mudler/LocalAI/core/http/middleware"
|
||||||
"github.com/mudler/LocalAI/core/schema"
|
"github.com/mudler/LocalAI/core/schema"
|
||||||
"github.com/mudler/LocalAI/pkg/functions"
|
"github.com/mudler/LocalAI/pkg/functions"
|
||||||
|
"github.com/mudler/LocalAI/pkg/reasoning"
|
||||||
|
|
||||||
"github.com/mudler/LocalAI/core/templates"
|
"github.com/mudler/LocalAI/core/templates"
|
||||||
"github.com/mudler/LocalAI/pkg/model"
|
"github.com/mudler/LocalAI/pkg/model"
|
||||||
@@ -43,10 +44,19 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
|
|||||||
lastEmittedReasoning := ""
|
lastEmittedReasoning := ""
|
||||||
lastEmittedCleanedContent := ""
|
lastEmittedCleanedContent := ""
|
||||||
|
|
||||||
|
// Configure reasoning extraction options
|
||||||
|
// Auto-detect if prompt ends with thinking tag
|
||||||
|
// or use explicit config setting
|
||||||
|
thinkingForcedOpen := config.ReasoningConfig.ThinkingForcedOpen || reasoning.DetectThinkingForcedOpen(s)
|
||||||
|
|
||||||
_, _, err := ComputeChoices(req, s, config, cl, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, tokenUsage backend.TokenUsage) bool {
|
_, _, err := ComputeChoices(req, s, config, cl, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, tokenUsage backend.TokenUsage) bool {
|
||||||
accumulatedContent += s
|
accumulatedContent += s
|
||||||
// Extract reasoning from accumulated content
|
// Extract reasoning from accumulated content
|
||||||
currentReasoning, cleanedContent := functions.ExtractReasoning(accumulatedContent)
|
opts := []reasoning.Option{}
|
||||||
|
if thinkingForcedOpen {
|
||||||
|
opts = append(opts, reasoning.WithThinkingForcedOpen())
|
||||||
|
}
|
||||||
|
currentReasoning, cleanedContent := reasoning.Extract(accumulatedContent, opts...)
|
||||||
|
|
||||||
// Calculate new reasoning delta (what we haven't emitted yet)
|
// Calculate new reasoning delta (what we haven't emitted yet)
|
||||||
var reasoningDelta *string
|
var reasoningDelta *string
|
||||||
@@ -230,7 +240,13 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
// Extract reasoning before processing tool calls
|
// Extract reasoning before processing tool calls
|
||||||
reasoning, cleanedResult := functions.ExtractReasoning(result)
|
// Auto-detect if prompt ends with thinking tag or use explicit config
|
||||||
|
toolsThinkingForcedOpen := config.ReasoningConfig.ThinkingForcedOpen || reasoning.DetectThinkingForcedOpen(prompt)
|
||||||
|
opts := []reasoning.Option{}
|
||||||
|
if toolsThinkingForcedOpen {
|
||||||
|
opts = append(opts, reasoning.WithThinkingForcedOpen())
|
||||||
|
}
|
||||||
|
extractedReasoning, cleanedResult := reasoning.Extract(result, opts...)
|
||||||
result = cleanedResult
|
result = cleanedResult
|
||||||
|
|
||||||
textContentToReturn = functions.ParseTextContent(result, config.FunctionsConfig)
|
textContentToReturn = functions.ParseTextContent(result, config.FunctionsConfig)
|
||||||
@@ -266,8 +282,8 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
|
|||||||
}
|
}
|
||||||
|
|
||||||
var deltaReasoning *string
|
var deltaReasoning *string
|
||||||
if reasoning != "" {
|
if extractedReasoning != "" {
|
||||||
deltaReasoning = &reasoning
|
deltaReasoning = &extractedReasoning
|
||||||
}
|
}
|
||||||
delta := &schema.Message{Content: &result}
|
delta := &schema.Message{Content: &result}
|
||||||
if deltaReasoning != nil {
|
if deltaReasoning != nil {
|
||||||
@@ -618,17 +634,24 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
|
|||||||
// no streaming mode
|
// no streaming mode
|
||||||
default:
|
default:
|
||||||
|
|
||||||
|
// Auto-detect if prompt ends with thinking tag for non-streaming mode
|
||||||
|
nonStreamThinkingForcedOpen := config.ReasoningConfig.ThinkingForcedOpen || reasoning.DetectThinkingForcedOpen(predInput)
|
||||||
|
|
||||||
tokenCallback := func(s string, c *[]schema.Choice) {
|
tokenCallback := func(s string, c *[]schema.Choice) {
|
||||||
// Extract reasoning from the response
|
// Extract reasoning from the response
|
||||||
reasoning, cleanedS := functions.ExtractReasoning(s)
|
var extractedReasoning string
|
||||||
s = cleanedS
|
opts := []reasoning.Option{}
|
||||||
|
if nonStreamThinkingForcedOpen {
|
||||||
|
opts = append(opts, reasoning.WithThinkingForcedOpen())
|
||||||
|
}
|
||||||
|
extractedReasoning, s = reasoning.Extract(s, opts...)
|
||||||
|
|
||||||
if !shouldUseFn {
|
if !shouldUseFn {
|
||||||
// no function is called, just reply and use stop as finish reason
|
// no function is called, just reply and use stop as finish reason
|
||||||
stopReason := FinishReasonStop
|
stopReason := FinishReasonStop
|
||||||
message := &schema.Message{Role: "assistant", Content: &s}
|
message := &schema.Message{Role: "assistant", Content: &s}
|
||||||
if reasoning != "" {
|
if extractedReasoning != "" {
|
||||||
message.Reasoning = &reasoning
|
message.Reasoning = &extractedReasoning
|
||||||
}
|
}
|
||||||
*c = append(*c, schema.Choice{FinishReason: &stopReason, Index: 0, Message: message})
|
*c = append(*c, schema.Choice{FinishReason: &stopReason, Index: 0, Message: message})
|
||||||
return
|
return
|
||||||
@@ -650,8 +673,8 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
|
|||||||
|
|
||||||
stopReason := FinishReasonStop
|
stopReason := FinishReasonStop
|
||||||
message := &schema.Message{Role: "assistant", Content: &result}
|
message := &schema.Message{Role: "assistant", Content: &result}
|
||||||
if reasoning != "" {
|
if extractedReasoning != "" {
|
||||||
message.Reasoning = &reasoning
|
message.Reasoning = &extractedReasoning
|
||||||
}
|
}
|
||||||
*c = append(*c, schema.Choice{
|
*c = append(*c, schema.Choice{
|
||||||
FinishReason: &stopReason,
|
FinishReason: &stopReason,
|
||||||
@@ -664,8 +687,8 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
|
|||||||
Role: "assistant",
|
Role: "assistant",
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
if reasoning != "" {
|
if extractedReasoning != "" {
|
||||||
toolChoice.Message.Reasoning = &reasoning
|
toolChoice.Message.Reasoning = &extractedReasoning
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, ss := range results {
|
for _, ss := range results {
|
||||||
@@ -695,8 +718,8 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
|
|||||||
"arguments": args,
|
"arguments": args,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
if reasoning != "" {
|
if extractedReasoning != "" {
|
||||||
message.Reasoning = &reasoning
|
message.Reasoning = &extractedReasoning
|
||||||
}
|
}
|
||||||
*c = append(*c, schema.Choice{
|
*c = append(*c, schema.Choice{
|
||||||
FinishReason: &functionCallReason,
|
FinishReason: &functionCallReason,
|
||||||
|
|||||||
3301
core/http/endpoints/openresponses/responses.go
Normal file
3301
core/http/endpoints/openresponses/responses.go
Normal file
File diff suppressed because it is too large
Load Diff
453
core/http/endpoints/openresponses/store.go
Normal file
453
core/http/endpoints/openresponses/store.go
Normal file
@@ -0,0 +1,453 @@
|
|||||||
|
package openresponses
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/mudler/LocalAI/core/schema"
|
||||||
|
"github.com/mudler/xlog"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ResponseStore provides thread-safe storage for Open Responses API responses
|
||||||
|
type ResponseStore struct {
|
||||||
|
mu sync.RWMutex
|
||||||
|
responses map[string]*StoredResponse
|
||||||
|
ttl time.Duration // Time-to-live for stored responses (0 = no expiration)
|
||||||
|
cleanupCtx context.Context
|
||||||
|
cleanupCancel context.CancelFunc
|
||||||
|
}
|
||||||
|
|
||||||
|
// StreamedEvent represents a buffered SSE event for streaming resume
|
||||||
|
type StreamedEvent struct {
|
||||||
|
SequenceNumber int `json:"sequence_number"`
|
||||||
|
EventType string `json:"event_type"`
|
||||||
|
Data []byte `json:"data"` // JSON-serialized event
|
||||||
|
}
|
||||||
|
|
||||||
|
// StoredResponse contains a complete response with its input request and output items
|
||||||
|
type StoredResponse struct {
|
||||||
|
Request *schema.OpenResponsesRequest
|
||||||
|
Response *schema.ORResponseResource
|
||||||
|
Items map[string]*schema.ORItemField // item_id -> item mapping for quick lookup
|
||||||
|
StoredAt time.Time
|
||||||
|
ExpiresAt *time.Time // nil if no expiration
|
||||||
|
|
||||||
|
// Background execution support
|
||||||
|
CancelFunc context.CancelFunc // For cancellation of background tasks
|
||||||
|
StreamEvents []StreamedEvent // Buffered events for streaming resume
|
||||||
|
StreamEnabled bool // Was created with stream=true
|
||||||
|
IsBackground bool // Was created with background=true
|
||||||
|
EventsChan chan struct{} // Signals new events for live subscribers
|
||||||
|
mu sync.RWMutex // Protect concurrent access to this response
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
globalStore *ResponseStore
|
||||||
|
storeOnce sync.Once
|
||||||
|
)
|
||||||
|
|
||||||
|
// GetGlobalStore returns the singleton response store instance
|
||||||
|
func GetGlobalStore() *ResponseStore {
|
||||||
|
storeOnce.Do(func() {
|
||||||
|
globalStore = NewResponseStore(0) // Default: no TTL, will be updated from appConfig
|
||||||
|
})
|
||||||
|
return globalStore
|
||||||
|
}
|
||||||
|
|
||||||
|
// SetTTL updates the TTL for the store
|
||||||
|
// This will affect all new responses stored after this call
|
||||||
|
func (s *ResponseStore) SetTTL(ttl time.Duration) {
|
||||||
|
s.mu.Lock()
|
||||||
|
defer s.mu.Unlock()
|
||||||
|
|
||||||
|
// Stop existing cleanup loop if running
|
||||||
|
if s.cleanupCancel != nil {
|
||||||
|
s.cleanupCancel()
|
||||||
|
s.cleanupCancel = nil
|
||||||
|
s.cleanupCtx = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
s.ttl = ttl
|
||||||
|
|
||||||
|
// If TTL > 0, start cleanup loop
|
||||||
|
if ttl > 0 {
|
||||||
|
s.cleanupCtx, s.cleanupCancel = context.WithCancel(context.Background())
|
||||||
|
go s.cleanupLoop(s.cleanupCtx)
|
||||||
|
}
|
||||||
|
|
||||||
|
xlog.Debug("Updated Open Responses store TTL", "ttl", ttl, "cleanup_running", ttl > 0)
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewResponseStore creates a new response store with optional TTL
|
||||||
|
// If ttl is 0, responses are stored indefinitely
|
||||||
|
func NewResponseStore(ttl time.Duration) *ResponseStore {
|
||||||
|
store := &ResponseStore{
|
||||||
|
responses: make(map[string]*StoredResponse),
|
||||||
|
ttl: ttl,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start cleanup goroutine if TTL is set
|
||||||
|
if ttl > 0 {
|
||||||
|
store.cleanupCtx, store.cleanupCancel = context.WithCancel(context.Background())
|
||||||
|
go store.cleanupLoop(store.cleanupCtx)
|
||||||
|
}
|
||||||
|
|
||||||
|
return store
|
||||||
|
}
|
||||||
|
|
||||||
|
// Store stores a response with its request and items
|
||||||
|
func (s *ResponseStore) Store(responseID string, request *schema.OpenResponsesRequest, response *schema.ORResponseResource) {
|
||||||
|
s.mu.Lock()
|
||||||
|
defer s.mu.Unlock()
|
||||||
|
|
||||||
|
// Build item index for quick lookup
|
||||||
|
items := make(map[string]*schema.ORItemField)
|
||||||
|
for i := range response.Output {
|
||||||
|
item := &response.Output[i]
|
||||||
|
if item.ID != "" {
|
||||||
|
items[item.ID] = item
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
stored := &StoredResponse{
|
||||||
|
Request: request,
|
||||||
|
Response: response,
|
||||||
|
Items: items,
|
||||||
|
StoredAt: time.Now(),
|
||||||
|
ExpiresAt: nil,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set expiration if TTL is configured
|
||||||
|
if s.ttl > 0 {
|
||||||
|
expiresAt := time.Now().Add(s.ttl)
|
||||||
|
stored.ExpiresAt = &expiresAt
|
||||||
|
}
|
||||||
|
|
||||||
|
s.responses[responseID] = stored
|
||||||
|
xlog.Debug("Stored Open Responses response", "response_id", responseID, "items_count", len(items))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get retrieves a stored response by ID
|
||||||
|
func (s *ResponseStore) Get(responseID string) (*StoredResponse, error) {
|
||||||
|
s.mu.RLock()
|
||||||
|
defer s.mu.RUnlock()
|
||||||
|
|
||||||
|
stored, exists := s.responses[responseID]
|
||||||
|
if !exists {
|
||||||
|
return nil, fmt.Errorf("response not found: %s", responseID)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check expiration
|
||||||
|
if stored.ExpiresAt != nil && time.Now().After(*stored.ExpiresAt) {
|
||||||
|
// Expired, but we'll return it anyway and let caller handle cleanup
|
||||||
|
return nil, fmt.Errorf("response expired: %s", responseID)
|
||||||
|
}
|
||||||
|
|
||||||
|
return stored, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetItem retrieves a specific item from a stored response
|
||||||
|
func (s *ResponseStore) GetItem(responseID, itemID string) (*schema.ORItemField, error) {
|
||||||
|
stored, err := s.Get(responseID)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
item, exists := stored.Items[itemID]
|
||||||
|
if !exists {
|
||||||
|
return nil, fmt.Errorf("item not found: %s in response %s", itemID, responseID)
|
||||||
|
}
|
||||||
|
|
||||||
|
return item, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// FindItem searches for an item across all stored responses
|
||||||
|
// Returns the item and the response ID it was found in
|
||||||
|
func (s *ResponseStore) FindItem(itemID string) (*schema.ORItemField, string, error) {
|
||||||
|
s.mu.RLock()
|
||||||
|
defer s.mu.RUnlock()
|
||||||
|
|
||||||
|
now := time.Now()
|
||||||
|
for responseID, stored := range s.responses {
|
||||||
|
// Skip expired responses
|
||||||
|
if stored.ExpiresAt != nil && now.After(*stored.ExpiresAt) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if item, exists := stored.Items[itemID]; exists {
|
||||||
|
return item, responseID, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil, "", fmt.Errorf("item not found in any stored response: %s", itemID)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Delete removes a response from storage
|
||||||
|
func (s *ResponseStore) Delete(responseID string) {
|
||||||
|
s.mu.Lock()
|
||||||
|
defer s.mu.Unlock()
|
||||||
|
delete(s.responses, responseID)
|
||||||
|
xlog.Debug("Deleted Open Responses response", "response_id", responseID)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cleanup removes expired responses
|
||||||
|
func (s *ResponseStore) Cleanup() int {
|
||||||
|
if s.ttl == 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
s.mu.Lock()
|
||||||
|
defer s.mu.Unlock()
|
||||||
|
|
||||||
|
now := time.Now()
|
||||||
|
count := 0
|
||||||
|
for id, stored := range s.responses {
|
||||||
|
if stored.ExpiresAt != nil && now.After(*stored.ExpiresAt) {
|
||||||
|
delete(s.responses, id)
|
||||||
|
count++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if count > 0 {
|
||||||
|
xlog.Debug("Cleaned up expired Open Responses", "count", count)
|
||||||
|
}
|
||||||
|
|
||||||
|
return count
|
||||||
|
}
|
||||||
|
|
||||||
|
// cleanupLoop runs periodic cleanup of expired responses
|
||||||
|
func (s *ResponseStore) cleanupLoop(ctx context.Context) {
|
||||||
|
if s.ttl == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
ticker := time.NewTicker(s.ttl / 2) // Cleanup at half TTL interval
|
||||||
|
defer ticker.Stop()
|
||||||
|
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
xlog.Debug("Stopped Open Responses store cleanup loop")
|
||||||
|
return
|
||||||
|
case <-ticker.C:
|
||||||
|
s.Cleanup()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Count returns the number of stored responses
|
||||||
|
func (s *ResponseStore) Count() int {
|
||||||
|
s.mu.RLock()
|
||||||
|
defer s.mu.RUnlock()
|
||||||
|
return len(s.responses)
|
||||||
|
}
|
||||||
|
|
||||||
|
// StoreBackground stores a background response with cancel function and optional streaming support
|
||||||
|
func (s *ResponseStore) StoreBackground(responseID string, request *schema.OpenResponsesRequest, response *schema.ORResponseResource, cancelFunc context.CancelFunc, streamEnabled bool) {
|
||||||
|
s.mu.Lock()
|
||||||
|
defer s.mu.Unlock()
|
||||||
|
|
||||||
|
// Build item index for quick lookup
|
||||||
|
items := make(map[string]*schema.ORItemField)
|
||||||
|
for i := range response.Output {
|
||||||
|
item := &response.Output[i]
|
||||||
|
if item.ID != "" {
|
||||||
|
items[item.ID] = item
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
stored := &StoredResponse{
|
||||||
|
Request: request,
|
||||||
|
Response: response,
|
||||||
|
Items: items,
|
||||||
|
StoredAt: time.Now(),
|
||||||
|
ExpiresAt: nil,
|
||||||
|
CancelFunc: cancelFunc,
|
||||||
|
StreamEvents: []StreamedEvent{},
|
||||||
|
StreamEnabled: streamEnabled,
|
||||||
|
IsBackground: true,
|
||||||
|
EventsChan: make(chan struct{}, 100), // Buffered channel for event notifications
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set expiration if TTL is configured
|
||||||
|
if s.ttl > 0 {
|
||||||
|
expiresAt := time.Now().Add(s.ttl)
|
||||||
|
stored.ExpiresAt = &expiresAt
|
||||||
|
}
|
||||||
|
|
||||||
|
s.responses[responseID] = stored
|
||||||
|
xlog.Debug("Stored background Open Responses response", "response_id", responseID, "stream_enabled", streamEnabled)
|
||||||
|
}
|
||||||
|
|
||||||
|
// UpdateStatus updates the status of a stored response
|
||||||
|
func (s *ResponseStore) UpdateStatus(responseID string, status string, completedAt *int64) error {
|
||||||
|
s.mu.RLock()
|
||||||
|
stored, exists := s.responses[responseID]
|
||||||
|
s.mu.RUnlock()
|
||||||
|
|
||||||
|
if !exists {
|
||||||
|
return fmt.Errorf("response not found: %s", responseID)
|
||||||
|
}
|
||||||
|
|
||||||
|
stored.mu.Lock()
|
||||||
|
defer stored.mu.Unlock()
|
||||||
|
|
||||||
|
stored.Response.Status = status
|
||||||
|
stored.Response.CompletedAt = completedAt
|
||||||
|
|
||||||
|
xlog.Debug("Updated response status", "response_id", responseID, "status", status)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// UpdateResponse updates the entire response object for a stored response
|
||||||
|
func (s *ResponseStore) UpdateResponse(responseID string, response *schema.ORResponseResource) error {
|
||||||
|
s.mu.RLock()
|
||||||
|
stored, exists := s.responses[responseID]
|
||||||
|
s.mu.RUnlock()
|
||||||
|
|
||||||
|
if !exists {
|
||||||
|
return fmt.Errorf("response not found: %s", responseID)
|
||||||
|
}
|
||||||
|
|
||||||
|
stored.mu.Lock()
|
||||||
|
defer stored.mu.Unlock()
|
||||||
|
|
||||||
|
// Rebuild item index
|
||||||
|
items := make(map[string]*schema.ORItemField)
|
||||||
|
for i := range response.Output {
|
||||||
|
item := &response.Output[i]
|
||||||
|
if item.ID != "" {
|
||||||
|
items[item.ID] = item
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
stored.Response = response
|
||||||
|
stored.Items = items
|
||||||
|
|
||||||
|
xlog.Debug("Updated response", "response_id", responseID, "status", response.Status, "items_count", len(items))
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// AppendEvent appends a streaming event to the buffer for resume support
|
||||||
|
func (s *ResponseStore) AppendEvent(responseID string, event *schema.ORStreamEvent) error {
|
||||||
|
s.mu.RLock()
|
||||||
|
stored, exists := s.responses[responseID]
|
||||||
|
s.mu.RUnlock()
|
||||||
|
|
||||||
|
if !exists {
|
||||||
|
return fmt.Errorf("response not found: %s", responseID)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Serialize the event
|
||||||
|
data, err := json.Marshal(event)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to marshal event: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
stored.mu.Lock()
|
||||||
|
stored.StreamEvents = append(stored.StreamEvents, StreamedEvent{
|
||||||
|
SequenceNumber: event.SequenceNumber,
|
||||||
|
EventType: event.Type,
|
||||||
|
Data: data,
|
||||||
|
})
|
||||||
|
stored.mu.Unlock()
|
||||||
|
|
||||||
|
// Notify any subscribers of new event
|
||||||
|
select {
|
||||||
|
case stored.EventsChan <- struct{}{}:
|
||||||
|
default:
|
||||||
|
// Channel full, subscribers will catch up
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetEventsAfter returns all events with sequence number greater than startingAfter
|
||||||
|
func (s *ResponseStore) GetEventsAfter(responseID string, startingAfter int) ([]StreamedEvent, error) {
|
||||||
|
s.mu.RLock()
|
||||||
|
stored, exists := s.responses[responseID]
|
||||||
|
s.mu.RUnlock()
|
||||||
|
|
||||||
|
if !exists {
|
||||||
|
return nil, fmt.Errorf("response not found: %s", responseID)
|
||||||
|
}
|
||||||
|
|
||||||
|
stored.mu.RLock()
|
||||||
|
defer stored.mu.RUnlock()
|
||||||
|
|
||||||
|
var result []StreamedEvent
|
||||||
|
for _, event := range stored.StreamEvents {
|
||||||
|
if event.SequenceNumber > startingAfter {
|
||||||
|
result = append(result, event)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cancel cancels a background response if it's still in progress
|
||||||
|
func (s *ResponseStore) Cancel(responseID string) (*schema.ORResponseResource, error) {
|
||||||
|
s.mu.RLock()
|
||||||
|
stored, exists := s.responses[responseID]
|
||||||
|
s.mu.RUnlock()
|
||||||
|
|
||||||
|
if !exists {
|
||||||
|
return nil, fmt.Errorf("response not found: %s", responseID)
|
||||||
|
}
|
||||||
|
|
||||||
|
stored.mu.Lock()
|
||||||
|
defer stored.mu.Unlock()
|
||||||
|
|
||||||
|
// If already in a terminal state, just return the response (idempotent)
|
||||||
|
status := stored.Response.Status
|
||||||
|
if status == schema.ORStatusCompleted || status == schema.ORStatusFailed ||
|
||||||
|
status == schema.ORStatusIncomplete || status == schema.ORStatusCancelled {
|
||||||
|
xlog.Debug("Response already in terminal state", "response_id", responseID, "status", status)
|
||||||
|
return stored.Response, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cancel the context if available
|
||||||
|
if stored.CancelFunc != nil {
|
||||||
|
stored.CancelFunc()
|
||||||
|
xlog.Debug("Cancelled background response", "response_id", responseID)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update status to cancelled
|
||||||
|
now := time.Now().Unix()
|
||||||
|
stored.Response.Status = schema.ORStatusCancelled
|
||||||
|
stored.Response.CompletedAt = &now
|
||||||
|
|
||||||
|
return stored.Response, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetEventsChan returns the events notification channel for a response
|
||||||
|
func (s *ResponseStore) GetEventsChan(responseID string) (chan struct{}, error) {
|
||||||
|
s.mu.RLock()
|
||||||
|
stored, exists := s.responses[responseID]
|
||||||
|
s.mu.RUnlock()
|
||||||
|
|
||||||
|
if !exists {
|
||||||
|
return nil, fmt.Errorf("response not found: %s", responseID)
|
||||||
|
}
|
||||||
|
|
||||||
|
return stored.EventsChan, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsStreamEnabled checks if a response was created with streaming enabled
|
||||||
|
func (s *ResponseStore) IsStreamEnabled(responseID string) (bool, error) {
|
||||||
|
s.mu.RLock()
|
||||||
|
stored, exists := s.responses[responseID]
|
||||||
|
s.mu.RUnlock()
|
||||||
|
|
||||||
|
if !exists {
|
||||||
|
return false, fmt.Errorf("response not found: %s", responseID)
|
||||||
|
}
|
||||||
|
|
||||||
|
stored.mu.RLock()
|
||||||
|
defer stored.mu.RUnlock()
|
||||||
|
|
||||||
|
return stored.StreamEnabled, nil
|
||||||
|
}
|
||||||
13
core/http/endpoints/openresponses/store_suite_test.go
Normal file
13
core/http/endpoints/openresponses/store_suite_test.go
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
package openresponses
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
. "github.com/onsi/ginkgo/v2"
|
||||||
|
. "github.com/onsi/gomega"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestStore(t *testing.T) {
|
||||||
|
RegisterFailHandler(Fail)
|
||||||
|
RunSpecs(t, "ResponseStore Suite")
|
||||||
|
}
|
||||||
626
core/http/endpoints/openresponses/store_test.go
Normal file
626
core/http/endpoints/openresponses/store_test.go
Normal file
@@ -0,0 +1,626 @@
|
|||||||
|
package openresponses
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/mudler/LocalAI/core/schema"
|
||||||
|
. "github.com/onsi/ginkgo/v2"
|
||||||
|
. "github.com/onsi/gomega"
|
||||||
|
)
|
||||||
|
|
||||||
|
var _ = Describe("ResponseStore", func() {
|
||||||
|
var store *ResponseStore
|
||||||
|
|
||||||
|
BeforeEach(func() {
|
||||||
|
store = NewResponseStore(0) // No TTL for most tests
|
||||||
|
})
|
||||||
|
|
||||||
|
AfterEach(func() {
|
||||||
|
// Clean up
|
||||||
|
})
|
||||||
|
|
||||||
|
Describe("Store and Get", func() {
|
||||||
|
It("should store and retrieve a response", func() {
|
||||||
|
responseID := "resp_test123"
|
||||||
|
request := &schema.OpenResponsesRequest{
|
||||||
|
Model: "test-model",
|
||||||
|
Input: "Hello",
|
||||||
|
}
|
||||||
|
response := &schema.ORResponseResource{
|
||||||
|
ID: responseID,
|
||||||
|
Object: "response",
|
||||||
|
CreatedAt: time.Now().Unix(),
|
||||||
|
Status: "completed",
|
||||||
|
Model: "test-model",
|
||||||
|
Output: []schema.ORItemField{
|
||||||
|
{
|
||||||
|
Type: "message",
|
||||||
|
ID: "msg_123",
|
||||||
|
Status: "completed",
|
||||||
|
Role: "assistant",
|
||||||
|
Content: []schema.ORContentPart{{
|
||||||
|
Type: "output_text",
|
||||||
|
Text: "Hello, world!",
|
||||||
|
Annotations: []schema.ORAnnotation{},
|
||||||
|
Logprobs: []schema.ORLogProb{},
|
||||||
|
}},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
store.Store(responseID, request, response)
|
||||||
|
|
||||||
|
stored, err := store.Get(responseID)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(stored).ToNot(BeNil())
|
||||||
|
Expect(stored.Response.ID).To(Equal(responseID))
|
||||||
|
Expect(stored.Request.Model).To(Equal("test-model"))
|
||||||
|
Expect(len(stored.Items)).To(Equal(1))
|
||||||
|
Expect(stored.Items["msg_123"]).ToNot(BeNil())
|
||||||
|
Expect(stored.Items["msg_123"].ID).To(Equal("msg_123"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should return error for non-existent response", func() {
|
||||||
|
_, err := store.Get("nonexistent")
|
||||||
|
Expect(err).To(HaveOccurred())
|
||||||
|
Expect(err.Error()).To(ContainSubstring("not found"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should index all items by ID", func() {
|
||||||
|
responseID := "resp_test456"
|
||||||
|
request := &schema.OpenResponsesRequest{
|
||||||
|
Model: "test-model",
|
||||||
|
Input: "Test",
|
||||||
|
}
|
||||||
|
response := &schema.ORResponseResource{
|
||||||
|
ID: responseID,
|
||||||
|
Object: "response",
|
||||||
|
Output: []schema.ORItemField{
|
||||||
|
{
|
||||||
|
Type: "message",
|
||||||
|
ID: "msg_1",
|
||||||
|
Status: "completed",
|
||||||
|
Role: "assistant",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Type: "function_call",
|
||||||
|
ID: "fc_1",
|
||||||
|
Status: "completed",
|
||||||
|
CallID: "fc_1",
|
||||||
|
Name: "test_function",
|
||||||
|
Arguments: `{"arg": "value"}`,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Type: "message",
|
||||||
|
ID: "msg_2",
|
||||||
|
Status: "completed",
|
||||||
|
Role: "assistant",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
store.Store(responseID, request, response)
|
||||||
|
|
||||||
|
stored, err := store.Get(responseID)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(len(stored.Items)).To(Equal(3))
|
||||||
|
Expect(stored.Items["msg_1"]).ToNot(BeNil())
|
||||||
|
Expect(stored.Items["fc_1"]).ToNot(BeNil())
|
||||||
|
Expect(stored.Items["msg_2"]).ToNot(BeNil())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should handle items without IDs", func() {
|
||||||
|
responseID := "resp_test789"
|
||||||
|
request := &schema.OpenResponsesRequest{
|
||||||
|
Model: "test-model",
|
||||||
|
Input: "Test",
|
||||||
|
}
|
||||||
|
response := &schema.ORResponseResource{
|
||||||
|
ID: responseID,
|
||||||
|
Object: "response",
|
||||||
|
Output: []schema.ORItemField{
|
||||||
|
{
|
||||||
|
Type: "message",
|
||||||
|
ID: "", // No ID
|
||||||
|
Status: "completed",
|
||||||
|
Role: "assistant",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Type: "message",
|
||||||
|
ID: "msg_with_id",
|
||||||
|
Status: "completed",
|
||||||
|
Role: "assistant",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
store.Store(responseID, request, response)
|
||||||
|
|
||||||
|
stored, err := store.Get(responseID)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
// Only items with IDs are indexed
|
||||||
|
Expect(len(stored.Items)).To(Equal(1))
|
||||||
|
Expect(stored.Items["msg_with_id"]).ToNot(BeNil())
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
Describe("GetItem", func() {
|
||||||
|
It("should retrieve a specific item by ID", func() {
|
||||||
|
responseID := "resp_item_test"
|
||||||
|
itemID := "msg_specific"
|
||||||
|
request := &schema.OpenResponsesRequest{
|
||||||
|
Model: "test-model",
|
||||||
|
Input: "Test",
|
||||||
|
}
|
||||||
|
response := &schema.ORResponseResource{
|
||||||
|
ID: responseID,
|
||||||
|
Object: "response",
|
||||||
|
Output: []schema.ORItemField{
|
||||||
|
{
|
||||||
|
Type: "message",
|
||||||
|
ID: itemID,
|
||||||
|
Status: "completed",
|
||||||
|
Role: "assistant",
|
||||||
|
Content: []schema.ORContentPart{{
|
||||||
|
Type: "output_text",
|
||||||
|
Text: "Specific message",
|
||||||
|
Annotations: []schema.ORAnnotation{},
|
||||||
|
Logprobs: []schema.ORLogProb{},
|
||||||
|
}},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
store.Store(responseID, request, response)
|
||||||
|
|
||||||
|
item, err := store.GetItem(responseID, itemID)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(item).ToNot(BeNil())
|
||||||
|
Expect(item.ID).To(Equal(itemID))
|
||||||
|
Expect(item.Type).To(Equal("message"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should return error for non-existent item", func() {
|
||||||
|
responseID := "resp_item_test2"
|
||||||
|
request := &schema.OpenResponsesRequest{
|
||||||
|
Model: "test-model",
|
||||||
|
Input: "Test",
|
||||||
|
}
|
||||||
|
response := &schema.ORResponseResource{
|
||||||
|
ID: responseID,
|
||||||
|
Object: "response",
|
||||||
|
Output: []schema.ORItemField{
|
||||||
|
{
|
||||||
|
Type: "message",
|
||||||
|
ID: "msg_existing",
|
||||||
|
Status: "completed",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
store.Store(responseID, request, response)
|
||||||
|
|
||||||
|
_, err := store.GetItem(responseID, "nonexistent_item")
|
||||||
|
Expect(err).To(HaveOccurred())
|
||||||
|
Expect(err.Error()).To(ContainSubstring("item not found"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should return error for non-existent response when getting item", func() {
|
||||||
|
_, err := store.GetItem("nonexistent_response", "any_item")
|
||||||
|
Expect(err).To(HaveOccurred())
|
||||||
|
Expect(err.Error()).To(ContainSubstring("response not found"))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
Describe("FindItem", func() {
|
||||||
|
It("should find an item across all stored responses", func() {
|
||||||
|
// Store first response
|
||||||
|
responseID1 := "resp_find_1"
|
||||||
|
itemID1 := "msg_find_1"
|
||||||
|
store.Store(responseID1, &schema.OpenResponsesRequest{Model: "test"}, &schema.ORResponseResource{
|
||||||
|
ID: responseID1,
|
||||||
|
Object: "response",
|
||||||
|
Output: []schema.ORItemField{
|
||||||
|
{Type: "message", ID: itemID1, Status: "completed"},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
// Store second response
|
||||||
|
responseID2 := "resp_find_2"
|
||||||
|
itemID2 := "msg_find_2"
|
||||||
|
store.Store(responseID2, &schema.OpenResponsesRequest{Model: "test"}, &schema.ORResponseResource{
|
||||||
|
ID: responseID2,
|
||||||
|
Object: "response",
|
||||||
|
Output: []schema.ORItemField{
|
||||||
|
{Type: "message", ID: itemID2, Status: "completed"},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
// Find item from first response
|
||||||
|
item, foundResponseID, err := store.FindItem(itemID1)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(item).ToNot(BeNil())
|
||||||
|
Expect(item.ID).To(Equal(itemID1))
|
||||||
|
Expect(foundResponseID).To(Equal(responseID1))
|
||||||
|
|
||||||
|
// Find item from second response
|
||||||
|
item, foundResponseID, err = store.FindItem(itemID2)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(item).ToNot(BeNil())
|
||||||
|
Expect(item.ID).To(Equal(itemID2))
|
||||||
|
Expect(foundResponseID).To(Equal(responseID2))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should return error when item not found in any response", func() {
|
||||||
|
_, _, err := store.FindItem("nonexistent_item")
|
||||||
|
Expect(err).To(HaveOccurred())
|
||||||
|
Expect(err.Error()).To(ContainSubstring("item not found in any stored response"))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
Describe("Delete", func() {
|
||||||
|
It("should delete a stored response", func() {
|
||||||
|
responseID := "resp_delete_test"
|
||||||
|
request := &schema.OpenResponsesRequest{Model: "test"}
|
||||||
|
response := &schema.ORResponseResource{
|
||||||
|
ID: responseID,
|
||||||
|
Object: "response",
|
||||||
|
}
|
||||||
|
|
||||||
|
store.Store(responseID, request, response)
|
||||||
|
Expect(store.Count()).To(Equal(1))
|
||||||
|
|
||||||
|
store.Delete(responseID)
|
||||||
|
Expect(store.Count()).To(Equal(0))
|
||||||
|
|
||||||
|
_, err := store.Get(responseID)
|
||||||
|
Expect(err).To(HaveOccurred())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should handle deleting non-existent response gracefully", func() {
|
||||||
|
// Should not panic
|
||||||
|
store.Delete("nonexistent")
|
||||||
|
Expect(store.Count()).To(Equal(0))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
Describe("Count", func() {
|
||||||
|
It("should return correct count of stored responses", func() {
|
||||||
|
Expect(store.Count()).To(Equal(0))
|
||||||
|
|
||||||
|
store.Store("resp_1", &schema.OpenResponsesRequest{Model: "test"}, &schema.ORResponseResource{ID: "resp_1", Object: "response"})
|
||||||
|
Expect(store.Count()).To(Equal(1))
|
||||||
|
|
||||||
|
store.Store("resp_2", &schema.OpenResponsesRequest{Model: "test"}, &schema.ORResponseResource{ID: "resp_2", Object: "response"})
|
||||||
|
Expect(store.Count()).To(Equal(2))
|
||||||
|
|
||||||
|
store.Delete("resp_1")
|
||||||
|
Expect(store.Count()).To(Equal(1))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
Describe("TTL and Expiration", func() {
|
||||||
|
It("should set expiration when TTL is configured", func() {
|
||||||
|
ttlStore := NewResponseStore(100 * time.Millisecond)
|
||||||
|
responseID := "resp_ttl_test"
|
||||||
|
request := &schema.OpenResponsesRequest{Model: "test"}
|
||||||
|
response := &schema.ORResponseResource{ID: responseID, Object: "response"}
|
||||||
|
|
||||||
|
ttlStore.Store(responseID, request, response)
|
||||||
|
|
||||||
|
stored, err := ttlStore.Get(responseID)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(stored.ExpiresAt).ToNot(BeNil())
|
||||||
|
Expect(stored.ExpiresAt.After(time.Now())).To(BeTrue())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should not set expiration when TTL is 0", func() {
|
||||||
|
responseID := "resp_no_ttl"
|
||||||
|
request := &schema.OpenResponsesRequest{Model: "test"}
|
||||||
|
response := &schema.ORResponseResource{ID: responseID, Object: "response"}
|
||||||
|
|
||||||
|
store.Store(responseID, request, response)
|
||||||
|
|
||||||
|
stored, err := store.Get(responseID)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(stored.ExpiresAt).To(BeNil())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should clean up expired responses", func() {
|
||||||
|
ttlStore := NewResponseStore(50 * time.Millisecond)
|
||||||
|
responseID := "resp_expire_test"
|
||||||
|
request := &schema.OpenResponsesRequest{Model: "test"}
|
||||||
|
response := &schema.ORResponseResource{ID: responseID, Object: "response"}
|
||||||
|
|
||||||
|
ttlStore.Store(responseID, request, response)
|
||||||
|
Expect(ttlStore.Count()).To(Equal(1))
|
||||||
|
|
||||||
|
// Wait for expiration (longer than TTL and cleanup interval)
|
||||||
|
time.Sleep(150 * time.Millisecond)
|
||||||
|
|
||||||
|
// Cleanup should remove expired response (may have already been cleaned by goroutine)
|
||||||
|
count := ttlStore.Cleanup()
|
||||||
|
// Count might be 0 if cleanup goroutine already ran, or 1 if we're first
|
||||||
|
Expect(count).To(BeNumerically(">=", 0))
|
||||||
|
Expect(ttlStore.Count()).To(Equal(0))
|
||||||
|
|
||||||
|
_, err := ttlStore.Get(responseID)
|
||||||
|
Expect(err).To(HaveOccurred())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should return error for expired response", func() {
|
||||||
|
ttlStore := NewResponseStore(50 * time.Millisecond)
|
||||||
|
responseID := "resp_expire_error"
|
||||||
|
request := &schema.OpenResponsesRequest{Model: "test"}
|
||||||
|
response := &schema.ORResponseResource{ID: responseID, Object: "response"}
|
||||||
|
|
||||||
|
ttlStore.Store(responseID, request, response)
|
||||||
|
|
||||||
|
// Wait for expiration (but not long enough for cleanup goroutine to remove it)
|
||||||
|
time.Sleep(75 * time.Millisecond)
|
||||||
|
|
||||||
|
// Try to get before cleanup goroutine removes it
|
||||||
|
_, err := ttlStore.Get(responseID)
|
||||||
|
// Error could be "expired" or "not found" (if cleanup already ran)
|
||||||
|
Expect(err).To(HaveOccurred())
|
||||||
|
// Either error message is acceptable
|
||||||
|
errMsg := err.Error()
|
||||||
|
Expect(errMsg).To(Or(ContainSubstring("expired"), ContainSubstring("not found")))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
Describe("Thread Safety", func() {
|
||||||
|
It("should handle concurrent stores and gets", func() {
|
||||||
|
// This is a basic concurrency test
|
||||||
|
done := make(chan bool, 10)
|
||||||
|
for i := 0; i < 10; i++ {
|
||||||
|
go func(id int) {
|
||||||
|
responseID := fmt.Sprintf("resp_concurrent_%d", id)
|
||||||
|
request := &schema.OpenResponsesRequest{Model: "test"}
|
||||||
|
response := &schema.ORResponseResource{
|
||||||
|
ID: responseID,
|
||||||
|
Object: "response",
|
||||||
|
Output: []schema.ORItemField{
|
||||||
|
{Type: "message", ID: fmt.Sprintf("msg_%d", id), Status: "completed"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
store.Store(responseID, request, response)
|
||||||
|
|
||||||
|
// Retrieve immediately
|
||||||
|
stored, err := store.Get(responseID)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(stored).ToNot(BeNil())
|
||||||
|
done <- true
|
||||||
|
}(i)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for all goroutines
|
||||||
|
for i := 0; i < 10; i++ {
|
||||||
|
<-done
|
||||||
|
}
|
||||||
|
|
||||||
|
Expect(store.Count()).To(Equal(10))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
Describe("GetGlobalStore", func() {
|
||||||
|
It("should return singleton instance", func() {
|
||||||
|
store1 := GetGlobalStore()
|
||||||
|
store2 := GetGlobalStore()
|
||||||
|
Expect(store1).To(Equal(store2))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should persist data across GetGlobalStore calls", func() {
|
||||||
|
globalStore := GetGlobalStore()
|
||||||
|
responseID := "resp_global_test"
|
||||||
|
request := &schema.OpenResponsesRequest{Model: "test"}
|
||||||
|
response := &schema.ORResponseResource{ID: responseID, Object: "response"}
|
||||||
|
|
||||||
|
globalStore.Store(responseID, request, response)
|
||||||
|
|
||||||
|
// Get store again
|
||||||
|
globalStore2 := GetGlobalStore()
|
||||||
|
stored, err := globalStore2.Get(responseID)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(stored).ToNot(BeNil())
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
Describe("Background Mode Support", func() {
|
||||||
|
It("should store background response with cancel function", func() {
|
||||||
|
responseID := "resp_bg_test"
|
||||||
|
request := &schema.OpenResponsesRequest{Model: "test"}
|
||||||
|
response := &schema.ORResponseResource{
|
||||||
|
ID: responseID,
|
||||||
|
Object: "response",
|
||||||
|
Status: schema.ORStatusQueued,
|
||||||
|
}
|
||||||
|
|
||||||
|
_, cancel := context.WithCancel(context.Background())
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
store.StoreBackground(responseID, request, response, cancel, true)
|
||||||
|
|
||||||
|
stored, err := store.Get(responseID)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(stored).ToNot(BeNil())
|
||||||
|
Expect(stored.IsBackground).To(BeTrue())
|
||||||
|
Expect(stored.StreamEnabled).To(BeTrue())
|
||||||
|
Expect(stored.CancelFunc).ToNot(BeNil())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should update status of stored response", func() {
|
||||||
|
responseID := "resp_status_test"
|
||||||
|
request := &schema.OpenResponsesRequest{Model: "test"}
|
||||||
|
response := &schema.ORResponseResource{
|
||||||
|
ID: responseID,
|
||||||
|
Object: "response",
|
||||||
|
Status: schema.ORStatusQueued,
|
||||||
|
}
|
||||||
|
|
||||||
|
store.Store(responseID, request, response)
|
||||||
|
|
||||||
|
err := store.UpdateStatus(responseID, schema.ORStatusInProgress, nil)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
|
||||||
|
stored, err := store.Get(responseID)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(stored.Response.Status).To(Equal(schema.ORStatusInProgress))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should append and retrieve streaming events", func() {
|
||||||
|
responseID := "resp_events_test"
|
||||||
|
request := &schema.OpenResponsesRequest{Model: "test"}
|
||||||
|
response := &schema.ORResponseResource{
|
||||||
|
ID: responseID,
|
||||||
|
Object: "response",
|
||||||
|
Status: schema.ORStatusInProgress,
|
||||||
|
}
|
||||||
|
|
||||||
|
_, cancel := context.WithCancel(context.Background())
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
store.StoreBackground(responseID, request, response, cancel, true)
|
||||||
|
|
||||||
|
// Append events
|
||||||
|
event1 := &schema.ORStreamEvent{
|
||||||
|
Type: "response.created",
|
||||||
|
SequenceNumber: 0,
|
||||||
|
}
|
||||||
|
event2 := &schema.ORStreamEvent{
|
||||||
|
Type: "response.in_progress",
|
||||||
|
SequenceNumber: 1,
|
||||||
|
}
|
||||||
|
event3 := &schema.ORStreamEvent{
|
||||||
|
Type: "response.output_text.delta",
|
||||||
|
SequenceNumber: 2,
|
||||||
|
}
|
||||||
|
|
||||||
|
err := store.AppendEvent(responseID, event1)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
err = store.AppendEvent(responseID, event2)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
err = store.AppendEvent(responseID, event3)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
|
||||||
|
// Get all events after -1 (all events)
|
||||||
|
events, err := store.GetEventsAfter(responseID, -1)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(events).To(HaveLen(3))
|
||||||
|
|
||||||
|
// Get events after sequence 1
|
||||||
|
events, err = store.GetEventsAfter(responseID, 1)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(events).To(HaveLen(1))
|
||||||
|
Expect(events[0].SequenceNumber).To(Equal(2))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should cancel an in-progress response", func() {
|
||||||
|
responseID := "resp_cancel_test"
|
||||||
|
request := &schema.OpenResponsesRequest{Model: "test"}
|
||||||
|
response := &schema.ORResponseResource{
|
||||||
|
ID: responseID,
|
||||||
|
Object: "response",
|
||||||
|
Status: schema.ORStatusInProgress,
|
||||||
|
}
|
||||||
|
|
||||||
|
_, cancel := context.WithCancel(context.Background())
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
store.StoreBackground(responseID, request, response, cancel, false)
|
||||||
|
|
||||||
|
// Cancel the response
|
||||||
|
cancelledResponse, err := store.Cancel(responseID)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(cancelledResponse.Status).To(Equal(schema.ORStatusCancelled))
|
||||||
|
Expect(cancelledResponse.CompletedAt).ToNot(BeNil())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should be idempotent when cancelling already completed response", func() {
|
||||||
|
responseID := "resp_idempotent_cancel"
|
||||||
|
request := &schema.OpenResponsesRequest{Model: "test"}
|
||||||
|
completedAt := time.Now().Unix()
|
||||||
|
response := &schema.ORResponseResource{
|
||||||
|
ID: responseID,
|
||||||
|
Object: "response",
|
||||||
|
Status: schema.ORStatusCompleted,
|
||||||
|
CompletedAt: &completedAt,
|
||||||
|
}
|
||||||
|
|
||||||
|
store.Store(responseID, request, response)
|
||||||
|
|
||||||
|
// Try to cancel a completed response
|
||||||
|
cancelledResponse, err := store.Cancel(responseID)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
// Status should remain completed (not changed to cancelled)
|
||||||
|
Expect(cancelledResponse.Status).To(Equal(schema.ORStatusCompleted))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should check if streaming is enabled", func() {
|
||||||
|
responseID := "resp_stream_check"
|
||||||
|
request := &schema.OpenResponsesRequest{Model: "test"}
|
||||||
|
response := &schema.ORResponseResource{
|
||||||
|
ID: responseID,
|
||||||
|
Object: "response",
|
||||||
|
Status: schema.ORStatusQueued,
|
||||||
|
}
|
||||||
|
|
||||||
|
_, cancel := context.WithCancel(context.Background())
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
store.StoreBackground(responseID, request, response, cancel, true)
|
||||||
|
|
||||||
|
enabled, err := store.IsStreamEnabled(responseID)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(enabled).To(BeTrue())
|
||||||
|
|
||||||
|
// Store another without streaming
|
||||||
|
responseID2 := "resp_no_stream"
|
||||||
|
store.StoreBackground(responseID2, request, response, cancel, false)
|
||||||
|
|
||||||
|
enabled2, err := store.IsStreamEnabled(responseID2)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(enabled2).To(BeFalse())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should notify subscribers of new events", func() {
|
||||||
|
responseID := "resp_events_chan"
|
||||||
|
request := &schema.OpenResponsesRequest{Model: "test"}
|
||||||
|
response := &schema.ORResponseResource{
|
||||||
|
ID: responseID,
|
||||||
|
Object: "response",
|
||||||
|
Status: schema.ORStatusInProgress,
|
||||||
|
}
|
||||||
|
|
||||||
|
_, cancel := context.WithCancel(context.Background())
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
store.StoreBackground(responseID, request, response, cancel, true)
|
||||||
|
|
||||||
|
eventsChan, err := store.GetEventsChan(responseID)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(eventsChan).ToNot(BeNil())
|
||||||
|
|
||||||
|
// Append an event
|
||||||
|
event := &schema.ORStreamEvent{
|
||||||
|
Type: "response.output_text.delta",
|
||||||
|
SequenceNumber: 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
time.Sleep(10 * time.Millisecond)
|
||||||
|
store.AppendEvent(responseID, event)
|
||||||
|
}()
|
||||||
|
|
||||||
|
// Wait for notification
|
||||||
|
select {
|
||||||
|
case <-eventsChan:
|
||||||
|
// Event received
|
||||||
|
case <-time.After(1 * time.Second):
|
||||||
|
Fail("Timeout waiting for event notification")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
@@ -1,13 +1,33 @@
|
|||||||
package http_test
|
package http_test
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
. "github.com/onsi/ginkgo/v2"
|
. "github.com/onsi/ginkgo/v2"
|
||||||
. "github.com/onsi/gomega"
|
. "github.com/onsi/gomega"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
tmpdir string
|
||||||
|
modelDir string
|
||||||
|
)
|
||||||
|
|
||||||
func TestLocalAI(t *testing.T) {
|
func TestLocalAI(t *testing.T) {
|
||||||
RegisterFailHandler(Fail)
|
RegisterFailHandler(Fail)
|
||||||
|
|
||||||
|
var err error
|
||||||
|
tmpdir, err = os.MkdirTemp("", "")
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
modelDir = filepath.Join(tmpdir, "models")
|
||||||
|
err = os.Mkdir(modelDir, 0750)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
|
||||||
|
AfterSuite(func() {
|
||||||
|
err := os.RemoveAll(tmpdir)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
})
|
||||||
|
|
||||||
RunSpecs(t, "LocalAI HTTP test suite")
|
RunSpecs(t, "LocalAI HTTP test suite")
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -484,3 +484,103 @@ func mergeOpenAIRequestAndModelConfig(config *config.ModelConfig, input *schema.
|
|||||||
}
|
}
|
||||||
return fmt.Errorf("unable to validate configuration after merging")
|
return fmt.Errorf("unable to validate configuration after merging")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (re *RequestExtractor) SetOpenResponsesRequest(c echo.Context) error {
|
||||||
|
input, ok := c.Get(CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.OpenResponsesRequest)
|
||||||
|
if !ok || input.Model == "" {
|
||||||
|
return echo.ErrBadRequest
|
||||||
|
}
|
||||||
|
|
||||||
|
cfg, ok := c.Get(CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.ModelConfig)
|
||||||
|
if !ok || cfg == nil {
|
||||||
|
return echo.ErrBadRequest
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract or generate the correlation ID (Open Responses uses x-request-id)
|
||||||
|
correlationID := c.Request().Header.Get("x-request-id")
|
||||||
|
if correlationID == "" {
|
||||||
|
correlationID = uuid.New().String()
|
||||||
|
}
|
||||||
|
c.Response().Header().Set("x-request-id", correlationID)
|
||||||
|
|
||||||
|
// Use the request context directly - Echo properly supports context cancellation!
|
||||||
|
reqCtx := c.Request().Context()
|
||||||
|
c1, cancel := context.WithCancel(re.applicationConfig.Context)
|
||||||
|
|
||||||
|
// Cancel when request context is cancelled (client disconnects)
|
||||||
|
go func() {
|
||||||
|
select {
|
||||||
|
case <-reqCtx.Done():
|
||||||
|
cancel()
|
||||||
|
case <-c1.Done():
|
||||||
|
// Already cancelled
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
// Add the correlation ID to the new context
|
||||||
|
ctxWithCorrelationID := context.WithValue(c1, CorrelationIDKey, correlationID)
|
||||||
|
|
||||||
|
input.Context = ctxWithCorrelationID
|
||||||
|
input.Cancel = cancel
|
||||||
|
|
||||||
|
err := mergeOpenResponsesRequestAndModelConfig(cfg, input)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if cfg.Model == "" {
|
||||||
|
xlog.Debug("replacing empty cfg.Model with input value", "input.Model", input.Model)
|
||||||
|
cfg.Model = input.Model
|
||||||
|
}
|
||||||
|
|
||||||
|
c.Set(CONTEXT_LOCALS_KEY_LOCALAI_REQUEST, input)
|
||||||
|
c.Set(CONTEXT_LOCALS_KEY_MODEL_CONFIG, cfg)
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func mergeOpenResponsesRequestAndModelConfig(config *config.ModelConfig, input *schema.OpenResponsesRequest) error {
|
||||||
|
// Temperature
|
||||||
|
if input.Temperature != nil {
|
||||||
|
config.Temperature = input.Temperature
|
||||||
|
}
|
||||||
|
|
||||||
|
// TopP
|
||||||
|
if input.TopP != nil {
|
||||||
|
config.TopP = input.TopP
|
||||||
|
}
|
||||||
|
|
||||||
|
// MaxOutputTokens -> Maxtokens
|
||||||
|
if input.MaxOutputTokens != nil {
|
||||||
|
config.Maxtokens = input.MaxOutputTokens
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert tools to functions - this will be handled in the endpoint handler
|
||||||
|
// We just validate that tools are present if needed
|
||||||
|
|
||||||
|
// Handle tool_choice
|
||||||
|
if input.ToolChoice != nil {
|
||||||
|
switch tc := input.ToolChoice.(type) {
|
||||||
|
case string:
|
||||||
|
// "auto", "required", or "none"
|
||||||
|
if tc == "required" {
|
||||||
|
config.SetFunctionCallString("required")
|
||||||
|
} else if tc == "none" {
|
||||||
|
// Don't use tools - handled in endpoint
|
||||||
|
}
|
||||||
|
// "auto" is default - let model decide
|
||||||
|
case map[string]interface{}:
|
||||||
|
// Specific tool: {type:"function", name:"..."}
|
||||||
|
if tcType, ok := tc["type"].(string); ok && tcType == "function" {
|
||||||
|
if name, ok := tc["name"].(string); ok {
|
||||||
|
config.SetFunctionCallString(name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if valid, _ := config.Validate(); valid {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return fmt.Errorf("unable to validate configuration after merging")
|
||||||
|
}
|
||||||
|
|||||||
1027
core/http/openresponses_test.go
Normal file
1027
core/http/openresponses_test.go
Normal file
File diff suppressed because it is too large
Load Diff
58
core/http/routes/openresponses.go
Normal file
58
core/http/routes/openresponses.go
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
package routes
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/labstack/echo/v4"
|
||||||
|
"github.com/mudler/LocalAI/core/application"
|
||||||
|
"github.com/mudler/LocalAI/core/config"
|
||||||
|
"github.com/mudler/LocalAI/core/http/endpoints/openresponses"
|
||||||
|
"github.com/mudler/LocalAI/core/http/middleware"
|
||||||
|
"github.com/mudler/LocalAI/core/schema"
|
||||||
|
)
|
||||||
|
|
||||||
|
func RegisterOpenResponsesRoutes(app *echo.Echo,
|
||||||
|
re *middleware.RequestExtractor,
|
||||||
|
application *application.Application) {
|
||||||
|
|
||||||
|
// Open Responses API endpoint
|
||||||
|
responsesHandler := openresponses.ResponsesEndpoint(
|
||||||
|
application.ModelConfigLoader(),
|
||||||
|
application.ModelLoader(),
|
||||||
|
application.TemplatesEvaluator(),
|
||||||
|
application.ApplicationConfig(),
|
||||||
|
)
|
||||||
|
|
||||||
|
responsesMiddleware := []echo.MiddlewareFunc{
|
||||||
|
middleware.TraceMiddleware(application),
|
||||||
|
re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_CHAT)),
|
||||||
|
re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenResponsesRequest) }),
|
||||||
|
setOpenResponsesRequestContext(re),
|
||||||
|
}
|
||||||
|
|
||||||
|
// Main Open Responses endpoint
|
||||||
|
app.POST("/v1/responses", responsesHandler, responsesMiddleware...)
|
||||||
|
|
||||||
|
// Also support without version prefix for compatibility
|
||||||
|
app.POST("/responses", responsesHandler, responsesMiddleware...)
|
||||||
|
|
||||||
|
// GET /responses/:id - Retrieve a response (for polling background requests)
|
||||||
|
getResponseHandler := openresponses.GetResponseEndpoint()
|
||||||
|
app.GET("/v1/responses/:id", getResponseHandler, middleware.TraceMiddleware(application))
|
||||||
|
app.GET("/responses/:id", getResponseHandler, middleware.TraceMiddleware(application))
|
||||||
|
|
||||||
|
// POST /responses/:id/cancel - Cancel a background response
|
||||||
|
cancelResponseHandler := openresponses.CancelResponseEndpoint()
|
||||||
|
app.POST("/v1/responses/:id/cancel", cancelResponseHandler, middleware.TraceMiddleware(application))
|
||||||
|
app.POST("/responses/:id/cancel", cancelResponseHandler, middleware.TraceMiddleware(application))
|
||||||
|
}
|
||||||
|
|
||||||
|
// setOpenResponsesRequestContext sets up the context and cancel function for Open Responses requests
|
||||||
|
func setOpenResponsesRequestContext(re *middleware.RequestExtractor) echo.MiddlewareFunc {
|
||||||
|
return func(next echo.HandlerFunc) echo.HandlerFunc {
|
||||||
|
return func(c echo.Context) error {
|
||||||
|
if err := re.SetOpenResponsesRequest(c); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return next(c)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -28,6 +28,9 @@
|
|||||||
<a href="image/" class="text-[var(--color-text-secondary)] hover:text-[var(--color-text-primary)] px-2 py-2 rounded-lg transition duration-300 ease-in-out hover:bg-[var(--color-bg-secondary)] flex items-center group text-sm">
|
<a href="image/" class="text-[var(--color-text-secondary)] hover:text-[var(--color-text-primary)] px-2 py-2 rounded-lg transition duration-300 ease-in-out hover:bg-[var(--color-bg-secondary)] flex items-center group text-sm">
|
||||||
<i class="fas fa-image text-[var(--color-primary)] mr-1.5 text-sm group-hover:scale-110 transition-transform"></i>Images
|
<i class="fas fa-image text-[var(--color-primary)] mr-1.5 text-sm group-hover:scale-110 transition-transform"></i>Images
|
||||||
</a>
|
</a>
|
||||||
|
<a href="video/" class="text-[var(--color-text-secondary)] hover:text-[var(--color-text-primary)] px-2 py-2 rounded-lg transition duration-300 ease-in-out hover:bg-[var(--color-bg-secondary)] flex items-center group text-sm">
|
||||||
|
<i class="fas fa-video text-[var(--color-primary)] mr-1.5 text-sm group-hover:scale-110 transition-transform"></i>Video
|
||||||
|
</a>
|
||||||
<a href="tts/" class="text-[var(--color-text-secondary)] hover:text-[var(--color-text-primary)] px-2 py-2 rounded-lg transition duration-300 ease-in-out hover:bg-[var(--color-bg-secondary)] flex items-center group text-sm">
|
<a href="tts/" class="text-[var(--color-text-secondary)] hover:text-[var(--color-text-primary)] px-2 py-2 rounded-lg transition duration-300 ease-in-out hover:bg-[var(--color-bg-secondary)] flex items-center group text-sm">
|
||||||
<i class="fa-solid fa-music text-[var(--color-primary)] mr-1.5 text-sm group-hover:scale-110 transition-transform"></i>TTS
|
<i class="fa-solid fa-music text-[var(--color-primary)] mr-1.5 text-sm group-hover:scale-110 transition-transform"></i>TTS
|
||||||
</a>
|
</a>
|
||||||
@@ -88,6 +91,9 @@
|
|||||||
<a href="image/" class="block text-[var(--color-text-secondary)] hover:text-[var(--color-text-primary)] hover:bg-[var(--color-bg-secondary)] px-3 py-2 rounded-lg transition duration-300 ease-in-out flex items-center text-sm">
|
<a href="image/" class="block text-[var(--color-text-secondary)] hover:text-[var(--color-text-primary)] hover:bg-[var(--color-bg-secondary)] px-3 py-2 rounded-lg transition duration-300 ease-in-out flex items-center text-sm">
|
||||||
<i class="fas fa-image text-[var(--color-primary)] mr-3 w-5 text-center text-sm"></i>Images
|
<i class="fas fa-image text-[var(--color-primary)] mr-3 w-5 text-center text-sm"></i>Images
|
||||||
</a>
|
</a>
|
||||||
|
<a href="video/" class="block text-[var(--color-text-secondary)] hover:text-[var(--color-text-primary)] hover:bg-[var(--color-bg-secondary)] px-3 py-2 rounded-lg transition duration-300 ease-in-out flex items-center text-sm">
|
||||||
|
<i class="fas fa-video text-[var(--color-primary)] mr-3 w-5 text-center text-sm"></i>Video
|
||||||
|
</a>
|
||||||
<a href="tts/" class="block text-[var(--color-text-secondary)] hover:text-[var(--color-text-primary)] hover:bg-[var(--color-bg-secondary)] px-3 py-2 rounded-lg transition duration-300 ease-in-out flex items-center text-sm">
|
<a href="tts/" class="block text-[var(--color-text-secondary)] hover:text-[var(--color-text-primary)] hover:bg-[var(--color-bg-secondary)] px-3 py-2 rounded-lg transition duration-300 ease-in-out flex items-center text-sm">
|
||||||
<i class="fa-solid fa-music text-[var(--color-primary)] mr-3 w-5 text-center text-sm"></i>TTS
|
<i class="fa-solid fa-music text-[var(--color-primary)] mr-3 w-5 text-center text-sm"></i>TTS
|
||||||
</a>
|
</a>
|
||||||
|
|||||||
@@ -485,6 +485,28 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<!-- Open Responses Settings Section -->
|
||||||
|
<div class="bg-[var(--color-bg-secondary)] border border-[var(--color-accent)]/20 rounded-lg p-6">
|
||||||
|
<h2 class="text-xl font-semibold text-[var(--color-text-primary)] mb-4 flex items-center">
|
||||||
|
<i class="fas fa-database mr-2 text-[var(--color-accent)] text-sm"></i>
|
||||||
|
Open Responses Settings
|
||||||
|
</h2>
|
||||||
|
<p class="text-xs text-[var(--color-text-secondary)] mb-4">
|
||||||
|
Configure Open Responses API response storage
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<div class="space-y-4">
|
||||||
|
<!-- Store TTL -->
|
||||||
|
<div>
|
||||||
|
<label class="block text-sm font-medium text-[var(--color-text-primary)] mb-2">Response Store TTL</label>
|
||||||
|
<p class="text-xs text-[var(--color-text-secondary)] mb-2">Time-to-live for stored responses (e.g., 1h, 30m, 0 = no expiration)</p>
|
||||||
|
<input type="text" x-model="settings.open_responses_store_ttl"
|
||||||
|
placeholder="0"
|
||||||
|
class="w-full px-3 py-2 bg-[var(--color-bg-primary)] border border-[var(--color-accent)]/20 rounded text-sm text-[var(--color-text-primary)] focus:outline-none focus:ring-2 focus:ring-[var(--color-accent)]/50">
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
<!-- API Keys Settings Section -->
|
<!-- API Keys Settings Section -->
|
||||||
<div class="bg-[var(--color-bg-secondary)] border border-[var(--color-error-light)] rounded-lg p-6">
|
<div class="bg-[var(--color-bg-secondary)] border border-[var(--color-error-light)] rounded-lg p-6">
|
||||||
<h2 class="text-xl font-semibold text-[var(--color-text-primary)] mb-4 flex items-center">
|
<h2 class="text-xl font-semibold text-[var(--color-text-primary)] mb-4 flex items-center">
|
||||||
@@ -633,7 +655,8 @@ function settingsDashboard() {
|
|||||||
galleries_json: '[]',
|
galleries_json: '[]',
|
||||||
backend_galleries_json: '[]',
|
backend_galleries_json: '[]',
|
||||||
api_keys_text: '',
|
api_keys_text: '',
|
||||||
agent_job_retention_days: 30
|
agent_job_retention_days: 30,
|
||||||
|
open_responses_store_ttl: '0'
|
||||||
},
|
},
|
||||||
sourceInfo: '',
|
sourceInfo: '',
|
||||||
saving: false,
|
saving: false,
|
||||||
@@ -680,7 +703,8 @@ function settingsDashboard() {
|
|||||||
galleries_json: JSON.stringify(data.galleries || [], null, 2),
|
galleries_json: JSON.stringify(data.galleries || [], null, 2),
|
||||||
backend_galleries_json: JSON.stringify(data.backend_galleries || [], null, 2),
|
backend_galleries_json: JSON.stringify(data.backend_galleries || [], null, 2),
|
||||||
api_keys_text: (data.api_keys || []).join('\n'),
|
api_keys_text: (data.api_keys || []).join('\n'),
|
||||||
agent_job_retention_days: data.agent_job_retention_days || 30
|
agent_job_retention_days: data.agent_job_retention_days || 30,
|
||||||
|
open_responses_store_ttl: data.open_responses_store_ttl || '0'
|
||||||
};
|
};
|
||||||
this.sourceInfo = data.source || 'default';
|
this.sourceInfo = data.source || 'default';
|
||||||
} else {
|
} else {
|
||||||
@@ -838,6 +862,9 @@ function settingsDashboard() {
|
|||||||
if (this.settings.agent_job_retention_days !== undefined) {
|
if (this.settings.agent_job_retention_days !== undefined) {
|
||||||
payload.agent_job_retention_days = parseInt(this.settings.agent_job_retention_days) || 30;
|
payload.agent_job_retention_days = parseInt(this.settings.agent_job_retention_days) || 30;
|
||||||
}
|
}
|
||||||
|
if (this.settings.open_responses_store_ttl !== undefined) {
|
||||||
|
payload.open_responses_store_ttl = this.settings.open_responses_store_ttl;
|
||||||
|
}
|
||||||
|
|
||||||
const response = await fetch('/api/settings', {
|
const response = await fetch('/api/settings', {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
|
|||||||
306
core/schema/openresponses.go
Normal file
306
core/schema/openresponses.go
Normal file
@@ -0,0 +1,306 @@
|
|||||||
|
package schema
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Open Responses status constants
|
||||||
|
const (
|
||||||
|
ORStatusQueued = "queued"
|
||||||
|
ORStatusInProgress = "in_progress"
|
||||||
|
ORStatusCompleted = "completed"
|
||||||
|
ORStatusFailed = "failed"
|
||||||
|
ORStatusIncomplete = "incomplete"
|
||||||
|
ORStatusCancelled = "cancelled"
|
||||||
|
)
|
||||||
|
|
||||||
|
// OpenResponsesRequest represents a request to the Open Responses API
|
||||||
|
// https://www.openresponses.org/specification
|
||||||
|
type OpenResponsesRequest struct {
|
||||||
|
Model string `json:"model"`
|
||||||
|
Input interface{} `json:"input"` // string or []ORItemParam
|
||||||
|
Tools []ORFunctionTool `json:"tools,omitempty"`
|
||||||
|
ToolChoice interface{} `json:"tool_choice,omitempty"` // "auto"|"required"|"none"|{type:"function",name:"..."}
|
||||||
|
Stream bool `json:"stream,omitempty"`
|
||||||
|
MaxOutputTokens *int `json:"max_output_tokens,omitempty"`
|
||||||
|
Temperature *float64 `json:"temperature,omitempty"`
|
||||||
|
TopP *float64 `json:"top_p,omitempty"`
|
||||||
|
Truncation string `json:"truncation,omitempty"` // "auto"|"disabled"
|
||||||
|
Instructions string `json:"instructions,omitempty"`
|
||||||
|
Reasoning *ORReasoningParam `json:"reasoning,omitempty"`
|
||||||
|
Metadata map[string]string `json:"metadata,omitempty"`
|
||||||
|
PreviousResponseID string `json:"previous_response_id,omitempty"`
|
||||||
|
|
||||||
|
// Additional parameters from spec
|
||||||
|
TextFormat interface{} `json:"text_format,omitempty"` // TextResponseFormat or JsonSchemaResponseFormatParam
|
||||||
|
ServiceTier string `json:"service_tier,omitempty"` // "auto"|"default"|priority hint
|
||||||
|
AllowedTools []string `json:"allowed_tools,omitempty"` // Restrict which tools can be invoked
|
||||||
|
Store *bool `json:"store,omitempty"` // Whether to store the response
|
||||||
|
Include []string `json:"include,omitempty"` // What to include in response
|
||||||
|
ParallelToolCalls *bool `json:"parallel_tool_calls,omitempty"` // Allow parallel tool calls
|
||||||
|
PresencePenalty *float64 `json:"presence_penalty,omitempty"` // Presence penalty (-2.0 to 2.0)
|
||||||
|
FrequencyPenalty *float64 `json:"frequency_penalty,omitempty"` // Frequency penalty (-2.0 to 2.0)
|
||||||
|
TopLogprobs *int `json:"top_logprobs,omitempty"` // Number of top logprobs to return
|
||||||
|
Background *bool `json:"background,omitempty"` // Run request in background
|
||||||
|
MaxToolCalls *int `json:"max_tool_calls,omitempty"` // Maximum number of tool calls
|
||||||
|
|
||||||
|
// OpenAI-compatible extensions (not in Open Responses spec)
|
||||||
|
LogitBias map[string]float64 `json:"logit_bias,omitempty"` // Map of token IDs to bias values (-100 to 100)
|
||||||
|
|
||||||
|
// Internal fields (like OpenAIRequest)
|
||||||
|
Context context.Context `json:"-"`
|
||||||
|
Cancel context.CancelFunc `json:"-"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// ModelName implements the LocalAIRequest interface
|
||||||
|
func (r *OpenResponsesRequest) ModelName(s *string) string {
|
||||||
|
if s != nil {
|
||||||
|
r.Model = *s
|
||||||
|
}
|
||||||
|
return r.Model
|
||||||
|
}
|
||||||
|
|
||||||
|
// ORFunctionTool represents a function tool definition
|
||||||
|
type ORFunctionTool struct {
|
||||||
|
Type string `json:"type"` // always "function"
|
||||||
|
Name string `json:"name"`
|
||||||
|
Description string `json:"description,omitempty"`
|
||||||
|
Parameters map[string]interface{} `json:"parameters,omitempty"`
|
||||||
|
Strict bool `json:"strict"` // Always include in response
|
||||||
|
}
|
||||||
|
|
||||||
|
// ORReasoningParam represents reasoning configuration
|
||||||
|
type ORReasoningParam struct {
|
||||||
|
Effort string `json:"effort,omitempty"` // "none"|"low"|"medium"|"high"|"xhigh"
|
||||||
|
Summary string `json:"summary,omitempty"` // "auto"|"concise"|"detailed"
|
||||||
|
}
|
||||||
|
|
||||||
|
// ORItemParam represents an input/output item (discriminated union by type)
|
||||||
|
type ORItemParam struct {
|
||||||
|
Type string `json:"type"` // message|function_call|function_call_output|reasoning|item_reference
|
||||||
|
ID string `json:"id,omitempty"` // Present for all output items
|
||||||
|
Status string `json:"status,omitempty"` // in_progress|completed|incomplete
|
||||||
|
|
||||||
|
// Message fields
|
||||||
|
Role string `json:"role,omitempty"` // user|assistant|system|developer
|
||||||
|
Content interface{} `json:"content,omitempty"` // string or []ORContentPart for messages
|
||||||
|
|
||||||
|
// Function call fields
|
||||||
|
CallID string `json:"call_id,omitempty"`
|
||||||
|
Name string `json:"name,omitempty"`
|
||||||
|
Arguments string `json:"arguments,omitempty"`
|
||||||
|
|
||||||
|
// Function call output fields
|
||||||
|
Output interface{} `json:"output,omitempty"` // string or []ORContentPart
|
||||||
|
|
||||||
|
// Note: For item_reference type, use the ID field above to reference the item
|
||||||
|
}
|
||||||
|
|
||||||
|
// ORContentPart represents a content block (discriminated union by type)
|
||||||
|
// For output_text: type, text, annotations, logprobs are ALL REQUIRED per Open Responses spec
|
||||||
|
type ORContentPart struct {
|
||||||
|
Type string `json:"type"` // input_text|input_image|input_file|output_text|refusal
|
||||||
|
Text string `json:"text"` // REQUIRED for output_text - must always be present (even if empty)
|
||||||
|
Annotations []ORAnnotation `json:"annotations"` // REQUIRED for output_text - must always be present (use [])
|
||||||
|
Logprobs []ORLogProb `json:"logprobs"` // REQUIRED for output_text - must always be present (use [])
|
||||||
|
ImageURL string `json:"image_url,omitempty"`
|
||||||
|
FileURL string `json:"file_url,omitempty"`
|
||||||
|
Filename string `json:"filename,omitempty"`
|
||||||
|
FileData string `json:"file_data,omitempty"`
|
||||||
|
Refusal string `json:"refusal,omitempty"`
|
||||||
|
Detail string `json:"detail,omitempty"` // low|high|auto for images
|
||||||
|
}
|
||||||
|
|
||||||
|
// OROutputTextContentPart is an alias for ORContentPart used specifically for output_text
|
||||||
|
type OROutputTextContentPart = ORContentPart
|
||||||
|
|
||||||
|
// ORItemField represents an output item (same structure as ORItemParam)
|
||||||
|
type ORItemField = ORItemParam
|
||||||
|
|
||||||
|
// ORResponseResource represents the main response object
|
||||||
|
type ORResponseResource struct {
|
||||||
|
ID string `json:"id"`
|
||||||
|
Object string `json:"object"` // always "response"
|
||||||
|
CreatedAt int64 `json:"created_at"`
|
||||||
|
CompletedAt *int64 `json:"completed_at"` // Required: present as number or null
|
||||||
|
Status string `json:"status"` // in_progress|completed|failed|incomplete
|
||||||
|
Model string `json:"model"`
|
||||||
|
Output []ORItemField `json:"output"`
|
||||||
|
Error *ORError `json:"error"` // Always present, null if no error
|
||||||
|
IncompleteDetails *ORIncompleteDetails `json:"incomplete_details"` // Always present, null if complete
|
||||||
|
PreviousResponseID *string `json:"previous_response_id"`
|
||||||
|
Instructions *string `json:"instructions"`
|
||||||
|
|
||||||
|
// Tool-related fields
|
||||||
|
Tools []ORFunctionTool `json:"tools"` // Always present, empty array if no tools
|
||||||
|
ToolChoice interface{} `json:"tool_choice"`
|
||||||
|
ParallelToolCalls bool `json:"parallel_tool_calls"`
|
||||||
|
MaxToolCalls *int `json:"max_tool_calls"` // nullable
|
||||||
|
|
||||||
|
// Sampling parameters (always required)
|
||||||
|
Temperature float64 `json:"temperature"`
|
||||||
|
TopP float64 `json:"top_p"`
|
||||||
|
PresencePenalty float64 `json:"presence_penalty"`
|
||||||
|
FrequencyPenalty float64 `json:"frequency_penalty"`
|
||||||
|
TopLogprobs int `json:"top_logprobs"` // Default to 0
|
||||||
|
MaxOutputTokens *int `json:"max_output_tokens"`
|
||||||
|
|
||||||
|
// Text format configuration
|
||||||
|
Text *ORTextConfig `json:"text"`
|
||||||
|
|
||||||
|
// Truncation and reasoning
|
||||||
|
Truncation string `json:"truncation"`
|
||||||
|
Reasoning *ORReasoning `json:"reasoning"` // nullable
|
||||||
|
|
||||||
|
// Usage statistics
|
||||||
|
Usage *ORUsage `json:"usage"` // nullable
|
||||||
|
|
||||||
|
// Metadata and operational flags
|
||||||
|
Metadata map[string]string `json:"metadata"`
|
||||||
|
Store bool `json:"store"`
|
||||||
|
Background bool `json:"background"`
|
||||||
|
ServiceTier string `json:"service_tier"`
|
||||||
|
|
||||||
|
// Safety and caching
|
||||||
|
SafetyIdentifier *string `json:"safety_identifier"` // nullable
|
||||||
|
PromptCacheKey *string `json:"prompt_cache_key"` // nullable
|
||||||
|
}
|
||||||
|
|
||||||
|
// ORTextConfig represents text format configuration
|
||||||
|
type ORTextConfig struct {
|
||||||
|
Format *ORTextFormat `json:"format,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// ORTextFormat represents the text format type
|
||||||
|
type ORTextFormat struct {
|
||||||
|
Type string `json:"type"` // "text" or "json_schema"
|
||||||
|
}
|
||||||
|
|
||||||
|
// ORError represents an error in the response
|
||||||
|
type ORError struct {
|
||||||
|
Type string `json:"type"` // invalid_request|not_found|server_error|model_error|too_many_requests
|
||||||
|
Code string `json:"code,omitempty"`
|
||||||
|
Message string `json:"message"`
|
||||||
|
Param string `json:"param,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// ORUsage represents token usage statistics
|
||||||
|
type ORUsage struct {
|
||||||
|
InputTokens int `json:"input_tokens"`
|
||||||
|
OutputTokens int `json:"output_tokens"`
|
||||||
|
TotalTokens int `json:"total_tokens"`
|
||||||
|
InputTokensDetails *ORInputTokensDetails `json:"input_tokens_details"` // Always present
|
||||||
|
OutputTokensDetails *OROutputTokensDetails `json:"output_tokens_details"` // Always present
|
||||||
|
}
|
||||||
|
|
||||||
|
// ORInputTokensDetails represents input token breakdown
|
||||||
|
type ORInputTokensDetails struct {
|
||||||
|
CachedTokens int `json:"cached_tokens"` // Always include, even if 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// OROutputTokensDetails represents output token breakdown
|
||||||
|
type OROutputTokensDetails struct {
|
||||||
|
ReasoningTokens int `json:"reasoning_tokens"` // Always include, even if 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// ORReasoning represents reasoning configuration and metadata
|
||||||
|
type ORReasoning struct {
|
||||||
|
Effort string `json:"effort,omitempty"`
|
||||||
|
Summary string `json:"summary,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// ORIncompleteDetails represents details about why a response was incomplete
|
||||||
|
type ORIncompleteDetails struct {
|
||||||
|
Reason string `json:"reason"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// ORStreamEvent represents a streaming event
|
||||||
|
// Note: Fields like delta, text, logprobs should be set explicitly for events that require them
|
||||||
|
// The sendSSEEvent function uses a custom serializer to handle conditional field inclusion
|
||||||
|
type ORStreamEvent struct {
|
||||||
|
Type string `json:"type"`
|
||||||
|
SequenceNumber int `json:"sequence_number"`
|
||||||
|
Response *ORResponseResource `json:"response,omitempty"`
|
||||||
|
OutputIndex *int `json:"output_index,omitempty"`
|
||||||
|
ContentIndex *int `json:"content_index,omitempty"`
|
||||||
|
SummaryIndex *int `json:"summary_index,omitempty"`
|
||||||
|
ItemID string `json:"item_id,omitempty"`
|
||||||
|
Item *ORItemField `json:"item,omitempty"`
|
||||||
|
Part *ORContentPart `json:"part,omitempty"`
|
||||||
|
Delta *string `json:"delta,omitempty"` // Pointer to distinguish unset from empty
|
||||||
|
Text *string `json:"text,omitempty"` // Pointer to distinguish unset from empty
|
||||||
|
Arguments *string `json:"arguments,omitempty"` // Pointer to distinguish unset from empty
|
||||||
|
Refusal string `json:"refusal,omitempty"`
|
||||||
|
Error *ORErrorPayload `json:"error,omitempty"`
|
||||||
|
Logprobs *[]ORLogProb `json:"logprobs,omitempty"` // Pointer to distinguish unset from empty
|
||||||
|
Obfuscation string `json:"obfuscation,omitempty"`
|
||||||
|
Annotation *ORAnnotation `json:"annotation,omitempty"`
|
||||||
|
AnnotationIndex *int `json:"annotation_index,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// ORErrorPayload represents an error payload in streaming events
|
||||||
|
type ORErrorPayload struct {
|
||||||
|
Type string `json:"type"`
|
||||||
|
Code string `json:"code,omitempty"`
|
||||||
|
Message string `json:"message"`
|
||||||
|
Param string `json:"param,omitempty"`
|
||||||
|
Headers map[string]string `json:"headers,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// ORLogProb represents log probability information
|
||||||
|
type ORLogProb struct {
|
||||||
|
Token string `json:"token"`
|
||||||
|
Logprob float64 `json:"logprob"`
|
||||||
|
Bytes []int `json:"bytes"`
|
||||||
|
TopLogprobs []ORTopLogProb `json:"top_logprobs,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// ORTopLogProb represents a top log probability
|
||||||
|
type ORTopLogProb struct {
|
||||||
|
Token string `json:"token"`
|
||||||
|
Logprob float64 `json:"logprob"`
|
||||||
|
Bytes []int `json:"bytes"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// ORAnnotation represents an annotation (e.g., URL citation)
|
||||||
|
type ORAnnotation struct {
|
||||||
|
Type string `json:"type"` // url_citation
|
||||||
|
StartIndex int `json:"start_index"`
|
||||||
|
EndIndex int `json:"end_index"`
|
||||||
|
URL string `json:"url"`
|
||||||
|
Title string `json:"title"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// ORContentPartWithLogprobs creates an output_text content part with logprobs converted from OpenAI format
|
||||||
|
func ORContentPartWithLogprobs(text string, logprobs *Logprobs) ORContentPart {
|
||||||
|
orLogprobs := []ORLogProb{}
|
||||||
|
|
||||||
|
// Convert OpenAI-style logprobs to Open Responses format
|
||||||
|
if logprobs != nil && len(logprobs.Content) > 0 {
|
||||||
|
for _, lp := range logprobs.Content {
|
||||||
|
// Convert top logprobs
|
||||||
|
topLPs := []ORTopLogProb{}
|
||||||
|
for _, tlp := range lp.TopLogprobs {
|
||||||
|
topLPs = append(topLPs, ORTopLogProb{
|
||||||
|
Token: tlp.Token,
|
||||||
|
Logprob: tlp.Logprob,
|
||||||
|
Bytes: tlp.Bytes,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
orLogprobs = append(orLogprobs, ORLogProb{
|
||||||
|
Token: lp.Token,
|
||||||
|
Logprob: lp.Logprob,
|
||||||
|
Bytes: lp.Bytes,
|
||||||
|
TopLogprobs: topLPs,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return ORContentPart{
|
||||||
|
Type: "output_text",
|
||||||
|
Text: text,
|
||||||
|
Annotations: []ORAnnotation{}, // REQUIRED - must always be present as array (empty if none)
|
||||||
|
Logprobs: orLogprobs, // REQUIRED - must always be present as array (empty if none)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -72,6 +72,359 @@ You can list all the models available with:
|
|||||||
curl http://localhost:8080/v1/models
|
curl http://localhost:8080/v1/models
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Anthropic Messages API
|
||||||
|
|
||||||
|
LocalAI supports the Anthropic Messages API, which is compatible with Claude clients. This endpoint provides a structured way to send messages and receive responses, with support for tools, streaming, and multimodal content.
|
||||||
|
|
||||||
|
**Endpoint:** `POST /v1/messages` or `POST /messages`
|
||||||
|
|
||||||
|
**Reference:** https://docs.anthropic.com/claude/reference/messages_post
|
||||||
|
|
||||||
|
#### Basic Usage
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://localhost:8080/v1/messages \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "anthropic-version: 2023-06-01" \
|
||||||
|
-d '{
|
||||||
|
"model": "ggml-koala-7b-model-q4_0-r2.bin",
|
||||||
|
"max_tokens": 1024,
|
||||||
|
"messages": [
|
||||||
|
{"role": "user", "content": "Say this is a test!"}
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Request Parameters
|
||||||
|
|
||||||
|
| Parameter | Type | Required | Description |
|
||||||
|
|-----------|------|----------|-------------|
|
||||||
|
| `model` | string | Yes | The model identifier |
|
||||||
|
| `messages` | array | Yes | Array of message objects with `role` and `content` |
|
||||||
|
| `max_tokens` | integer | Yes | Maximum number of tokens to generate (must be > 0) |
|
||||||
|
| `system` | string | No | System message to set the assistant's behavior |
|
||||||
|
| `temperature` | float | No | Sampling temperature (0.0 to 1.0) |
|
||||||
|
| `top_p` | float | No | Nucleus sampling parameter |
|
||||||
|
| `top_k` | integer | No | Top-k sampling parameter |
|
||||||
|
| `stop_sequences` | array | No | Array of strings that will stop generation |
|
||||||
|
| `stream` | boolean | No | Enable streaming responses |
|
||||||
|
| `tools` | array | No | Array of tool definitions for function calling |
|
||||||
|
| `tool_choice` | string/object | No | Tool choice strategy: "auto", "any", "none", or specific tool |
|
||||||
|
| `metadata` | object | No | Custom metadata to attach to the request |
|
||||||
|
|
||||||
|
#### Message Format
|
||||||
|
|
||||||
|
Messages can contain text or structured content blocks:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://localhost:8080/v1/messages \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"model": "ggml-koala-7b-model-q4_0-r2.bin",
|
||||||
|
"max_tokens": 1024,
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": "What is in this image?"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "image",
|
||||||
|
"source": {
|
||||||
|
"type": "base64",
|
||||||
|
"media_type": "image/jpeg",
|
||||||
|
"data": "base64_encoded_image_data"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Tool Calling
|
||||||
|
|
||||||
|
The Anthropic API supports function calling through tools:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://localhost:8080/v1/messages \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"model": "ggml-koala-7b-model-q4_0-r2.bin",
|
||||||
|
"max_tokens": 1024,
|
||||||
|
"tools": [
|
||||||
|
{
|
||||||
|
"name": "get_weather",
|
||||||
|
"description": "Get the current weather",
|
||||||
|
"input_schema": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"location": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The city and state"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["location"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"tool_choice": "auto",
|
||||||
|
"messages": [
|
||||||
|
{"role": "user", "content": "What is the weather in San Francisco?"}
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Streaming
|
||||||
|
|
||||||
|
Enable streaming responses by setting `stream: true`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://localhost:8080/v1/messages \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"model": "ggml-koala-7b-model-q4_0-r2.bin",
|
||||||
|
"max_tokens": 1024,
|
||||||
|
"stream": true,
|
||||||
|
"messages": [
|
||||||
|
{"role": "user", "content": "Tell me a story"}
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
Streaming responses use Server-Sent Events (SSE) format with event types: `message_start`, `content_block_start`, `content_block_delta`, `content_block_stop`, `message_delta`, and `message_stop`.
|
||||||
|
|
||||||
|
#### Response Format
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"id": "msg_abc123",
|
||||||
|
"type": "message",
|
||||||
|
"role": "assistant",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": "This is a test!"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"model": "ggml-koala-7b-model-q4_0-r2.bin",
|
||||||
|
"stop_reason": "end_turn",
|
||||||
|
"usage": {
|
||||||
|
"input_tokens": 10,
|
||||||
|
"output_tokens": 5
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Open Responses API
|
||||||
|
|
||||||
|
LocalAI supports the Open Responses API specification, which provides a standardized interface for AI model interactions with support for background processing, streaming, tool calling, and advanced features like reasoning.
|
||||||
|
|
||||||
|
**Endpoint:** `POST /v1/responses` or `POST /responses`
|
||||||
|
|
||||||
|
**Reference:** https://www.openresponses.org/specification
|
||||||
|
|
||||||
|
#### Basic Usage
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://localhost:8080/v1/responses \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"model": "ggml-koala-7b-model-q4_0-r2.bin",
|
||||||
|
"input": "Say this is a test!",
|
||||||
|
"max_output_tokens": 1024
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Request Parameters
|
||||||
|
|
||||||
|
| Parameter | Type | Required | Description |
|
||||||
|
|-----------|------|----------|-------------|
|
||||||
|
| `model` | string | Yes | The model identifier |
|
||||||
|
| `input` | string/array | Yes | Input text or array of input items |
|
||||||
|
| `max_output_tokens` | integer | No | Maximum number of tokens to generate |
|
||||||
|
| `temperature` | float | No | Sampling temperature |
|
||||||
|
| `top_p` | float | No | Nucleus sampling parameter |
|
||||||
|
| `instructions` | string | No | System instructions |
|
||||||
|
| `tools` | array | No | Array of tool definitions |
|
||||||
|
| `tool_choice` | string/object | No | Tool choice: "auto", "required", "none", or specific tool |
|
||||||
|
| `stream` | boolean | No | Enable streaming responses |
|
||||||
|
| `background` | boolean | No | Run request in background (returns immediately) |
|
||||||
|
| `store` | boolean | No | Whether to store the response |
|
||||||
|
| `reasoning` | object | No | Reasoning configuration with `effort` and `summary` |
|
||||||
|
| `parallel_tool_calls` | boolean | No | Allow parallel tool calls |
|
||||||
|
| `max_tool_calls` | integer | No | Maximum number of tool calls |
|
||||||
|
| `presence_penalty` | float | No | Presence penalty (-2.0 to 2.0) |
|
||||||
|
| `frequency_penalty` | float | No | Frequency penalty (-2.0 to 2.0) |
|
||||||
|
| `top_logprobs` | integer | No | Number of top logprobs to return |
|
||||||
|
| `truncation` | string | No | Truncation mode: "auto" or "disabled" |
|
||||||
|
| `text_format` | object | No | Text format configuration |
|
||||||
|
| `metadata` | object | No | Custom metadata |
|
||||||
|
|
||||||
|
#### Input Format
|
||||||
|
|
||||||
|
Input can be a simple string or an array of structured items:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://localhost:8080/v1/responses \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"model": "ggml-koala-7b-model-q4_0-r2.bin",
|
||||||
|
"input": [
|
||||||
|
{
|
||||||
|
"type": "message",
|
||||||
|
"role": "user",
|
||||||
|
"content": "What is the weather?"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"max_output_tokens": 1024
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Background Processing
|
||||||
|
|
||||||
|
Run requests in the background for long-running tasks:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://localhost:8080/v1/responses \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"model": "ggml-koala-7b-model-q4_0-r2.bin",
|
||||||
|
"input": "Generate a long story",
|
||||||
|
"max_output_tokens": 4096,
|
||||||
|
"background": true
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
The response will include a response ID that can be used to poll for completion:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"id": "resp_abc123",
|
||||||
|
"object": "response",
|
||||||
|
"status": "in_progress",
|
||||||
|
"created_at": 1234567890
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Retrieving Background Responses
|
||||||
|
|
||||||
|
Use the GET endpoint to retrieve background responses:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Get response by ID
|
||||||
|
curl http://localhost:8080/v1/responses/resp_abc123
|
||||||
|
|
||||||
|
# Resume streaming with query parameters
|
||||||
|
curl "http://localhost:8080/v1/responses/resp_abc123?stream=true&starting_after=10"
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Canceling Background Responses
|
||||||
|
|
||||||
|
Cancel a background response that's still in progress:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST http://localhost:8080/v1/responses/resp_abc123/cancel
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Tool Calling
|
||||||
|
|
||||||
|
Open Responses API supports function calling with tools:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://localhost:8080/v1/responses \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"model": "ggml-koala-7b-model-q4_0-r2.bin",
|
||||||
|
"input": "What is the weather in San Francisco?",
|
||||||
|
"tools": [
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"name": "get_weather",
|
||||||
|
"description": "Get the current weather",
|
||||||
|
"parameters": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"location": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The city and state"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["location"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"tool_choice": "auto",
|
||||||
|
"max_output_tokens": 1024
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Reasoning Configuration
|
||||||
|
|
||||||
|
Configure reasoning effort and summary style:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://localhost:8080/v1/responses \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"model": "ggml-koala-7b-model-q4_0-r2.bin",
|
||||||
|
"input": "Solve this complex problem step by step",
|
||||||
|
"reasoning": {
|
||||||
|
"effort": "high",
|
||||||
|
"summary": "detailed"
|
||||||
|
},
|
||||||
|
"max_output_tokens": 2048
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Response Format
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"id": "resp_abc123",
|
||||||
|
"object": "response",
|
||||||
|
"created_at": 1234567890,
|
||||||
|
"completed_at": 1234567895,
|
||||||
|
"status": "completed",
|
||||||
|
"model": "ggml-koala-7b-model-q4_0-r2.bin",
|
||||||
|
"output": [
|
||||||
|
{
|
||||||
|
"type": "message",
|
||||||
|
"id": "msg_001",
|
||||||
|
"role": "assistant",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "output_text",
|
||||||
|
"text": "This is a test!",
|
||||||
|
"annotations": [],
|
||||||
|
"logprobs": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"status": "completed"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"error": null,
|
||||||
|
"incomplete_details": null,
|
||||||
|
"temperature": 0.7,
|
||||||
|
"top_p": 1.0,
|
||||||
|
"presence_penalty": 0.0,
|
||||||
|
"frequency_penalty": 0.0,
|
||||||
|
"usage": {
|
||||||
|
"input_tokens": 10,
|
||||||
|
"output_tokens": 5,
|
||||||
|
"total_tokens": 15,
|
||||||
|
"input_tokens_details": {
|
||||||
|
"cached_tokens": 0
|
||||||
|
},
|
||||||
|
"output_tokens_details": {
|
||||||
|
"reasoning_tokens": 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
## Backends
|
## Backends
|
||||||
|
|
||||||
### RWKV
|
### RWKV
|
||||||
|
|||||||
@@ -112,6 +112,66 @@ curl http://localhost:8080/v1/chat/completions \
|
|||||||
|
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
|
### Anthropic Messages API
|
||||||
|
|
||||||
|
LocalAI supports the Anthropic Messages API for Claude-compatible models. [Anthropic documentation](https://docs.anthropic.com/claude/reference/messages_post).
|
||||||
|
|
||||||
|
<details>
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://localhost:8080/v1/messages \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "anthropic-version: 2023-06-01" \
|
||||||
|
-d '{
|
||||||
|
"model": "gpt-4",
|
||||||
|
"max_tokens": 1024,
|
||||||
|
"messages": [
|
||||||
|
{"role": "user", "content": "How are you doing?"}
|
||||||
|
],
|
||||||
|
"temperature": 0.7
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
</details>
|
||||||
|
|
||||||
|
### Open Responses API
|
||||||
|
|
||||||
|
LocalAI supports the Open Responses API specification with support for background processing, streaming, and advanced features. [Open Responses documentation](https://www.openresponses.org/specification).
|
||||||
|
|
||||||
|
<details>
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://localhost:8080/v1/responses \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"model": "gpt-4",
|
||||||
|
"input": "Say this is a test!",
|
||||||
|
"max_output_tokens": 1024,
|
||||||
|
"temperature": 0.7
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
For background processing:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://localhost:8080/v1/responses \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"model": "gpt-4",
|
||||||
|
"input": "Generate a long story",
|
||||||
|
"max_output_tokens": 4096,
|
||||||
|
"background": true
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
Then retrieve the response:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://localhost:8080/v1/responses/<response_id>
|
||||||
|
```
|
||||||
|
|
||||||
|
</details>
|
||||||
|
|
||||||
### Image Generation
|
### Image Generation
|
||||||
|
|
||||||
Creates an image given a prompt. [OpenAI documentation](https://platform.openai.com/docs/api-reference/images/create).
|
Creates an image given a prompt. [OpenAI documentation](https://platform.openai.com/docs/api-reference/images/create).
|
||||||
|
|||||||
@@ -1,3 +1,3 @@
|
|||||||
{
|
{
|
||||||
"version": "v3.9.0"
|
"version": "v3.10.0"
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -29,6 +29,7 @@
|
|||||||
|
|
||||||
This description emphasizes its capabilities, efficiency, and versatility for multimodal search tasks.
|
This description emphasizes its capabilities, efficiency, and versatility for multimodal search tasks.
|
||||||
overrides:
|
overrides:
|
||||||
|
reranking: true
|
||||||
parameters:
|
parameters:
|
||||||
model: llama-cpp/models/Qwen3-VL-Reranker-8B.Q4_K_M.gguf
|
model: llama-cpp/models/Qwen3-VL-Reranker-8B.Q4_K_M.gguf
|
||||||
name: Qwen3-VL-Reranker-8B-GGUF
|
name: Qwen3-VL-Reranker-8B-GGUF
|
||||||
@@ -3822,6 +3823,41 @@
|
|||||||
- filename: boomerang-qwen3-4.9B.Q4_K_M.gguf
|
- filename: boomerang-qwen3-4.9B.Q4_K_M.gguf
|
||||||
sha256: 11e6c068351d104dee31dd63550e5e2fc9be70467c1cfc07a6f84030cb701537
|
sha256: 11e6c068351d104dee31dd63550e5e2fc9be70467c1cfc07a6f84030cb701537
|
||||||
uri: huggingface://mradermacher/boomerang-qwen3-4.9B-GGUF/boomerang-qwen3-4.9B.Q4_K_M.gguf
|
uri: huggingface://mradermacher/boomerang-qwen3-4.9B-GGUF/boomerang-qwen3-4.9B.Q4_K_M.gguf
|
||||||
|
- !!merge <<: *qwen3
|
||||||
|
name: "qwen3-coder-30b-a3b-instruct"
|
||||||
|
icon: https://cdn-avatars.huggingface.co/v1/production/uploads/620760a26e3b7210c2ff1943/-s1gyJfvbE1RgO5iBeNOi.png
|
||||||
|
url: "github:mudler/LocalAI/gallery/qwen3.yaml@master"
|
||||||
|
urls:
|
||||||
|
- https://huggingface.co/Qwen/Qwen3-Coder-30B-A3B-Instruct
|
||||||
|
- https://huggingface.co/unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF
|
||||||
|
description: |
|
||||||
|
Qwen3-Coder is available in multiple sizes. Today, we're excited to introduce Qwen3-Coder-30B-A3B-Instruct. This streamlined model maintains impressive performance and efficiency, featuring the following key enhancements:
|
||||||
|
|
||||||
|
- Significant Performance among open models on Agentic Coding, Agentic Browser-Use, and other foundational coding tasks.
|
||||||
|
- Long-context Capabilities with native support for 256K tokens, extendable up to 1M tokens using Yarn, optimized for repository-scale understanding.
|
||||||
|
- Agentic Coding supporting for most platform such as Qwen Code, CLINE, featuring a specially designed function call format.
|
||||||
|
|
||||||
|
|
||||||
|
Model Overview:
|
||||||
|
Qwen3-Coder-30B-A3B-Instruct has the following features:
|
||||||
|
|
||||||
|
- Type: Causal Language Models
|
||||||
|
- Training Stage: Pretraining & Post-training
|
||||||
|
- Number of Parameters: 30.5B in total and 3.3B activated
|
||||||
|
- Number of Layers: 48
|
||||||
|
- Number of Attention Heads (GQA): 32 for Q and 4 for KV
|
||||||
|
- Number of Experts: 128
|
||||||
|
- Number of Activated Experts: 8
|
||||||
|
- Context Length: 262,144 natively.
|
||||||
|
|
||||||
|
NOTE: This model supports only non-thinking mode and does not generate <think></think> blocks in its output. Meanwhile, specifying enable_thinking=False is no longer required.
|
||||||
|
overrides:
|
||||||
|
parameters:
|
||||||
|
model: Qwen3-Coder-30B-A3B-Instruct-Q4_K_M.gguf
|
||||||
|
files:
|
||||||
|
- filename: Qwen3-Coder-30B-A3B-Instruct-Q4_K_M.gguf
|
||||||
|
sha256: fadc3e5f8d42bf7e894a785b05082e47daee4df26680389817e2093056f088ad
|
||||||
|
uri: huggingface://unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M.gguf
|
||||||
- &gemma3
|
- &gemma3
|
||||||
url: "github:mudler/LocalAI/gallery/gemma.yaml@master"
|
url: "github:mudler/LocalAI/gallery/gemma.yaml@master"
|
||||||
name: "gemma-3-27b-it"
|
name: "gemma-3-27b-it"
|
||||||
|
|||||||
2
go.mod
2
go.mod
@@ -32,7 +32,7 @@ require (
|
|||||||
github.com/mholt/archiver/v3 v3.5.1
|
github.com/mholt/archiver/v3 v3.5.1
|
||||||
github.com/microcosm-cc/bluemonday v1.0.27
|
github.com/microcosm-cc/bluemonday v1.0.27
|
||||||
github.com/modelcontextprotocol/go-sdk v1.2.0
|
github.com/modelcontextprotocol/go-sdk v1.2.0
|
||||||
github.com/mudler/cogito v0.7.2
|
github.com/mudler/cogito v0.8.1
|
||||||
github.com/mudler/edgevpn v0.31.1
|
github.com/mudler/edgevpn v0.31.1
|
||||||
github.com/mudler/go-processmanager v0.1.0
|
github.com/mudler/go-processmanager v0.1.0
|
||||||
github.com/mudler/memory v0.0.0-20251216220809-d1256471a6c2
|
github.com/mudler/memory v0.0.0-20251216220809-d1256471a6c2
|
||||||
|
|||||||
4
go.sum
4
go.sum
@@ -507,8 +507,8 @@ github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7P
|
|||||||
github.com/mr-tron/base58 v1.1.2/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc=
|
github.com/mr-tron/base58 v1.1.2/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc=
|
||||||
github.com/mr-tron/base58 v1.2.0 h1:T/HDJBh4ZCPbU39/+c3rRvE0uKBQlU27+QI8LJ4t64o=
|
github.com/mr-tron/base58 v1.2.0 h1:T/HDJBh4ZCPbU39/+c3rRvE0uKBQlU27+QI8LJ4t64o=
|
||||||
github.com/mr-tron/base58 v1.2.0/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc=
|
github.com/mr-tron/base58 v1.2.0/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc=
|
||||||
github.com/mudler/cogito v0.7.2 h1:J5eHZPsxpoKcnYUfogje5u0nnzGww7ytv7nSn1DMpms=
|
github.com/mudler/cogito v0.8.1 h1:66qPJkAMrq/Vo8AC/PvXWuVxYPhi7X2DQuJIilL8+3I=
|
||||||
github.com/mudler/cogito v0.7.2/go.mod h1:6sfja3lcu2nWRzEc0wwqGNu/eCG3EWgij+8s7xyUeQ4=
|
github.com/mudler/cogito v0.8.1/go.mod h1:6sfja3lcu2nWRzEc0wwqGNu/eCG3EWgij+8s7xyUeQ4=
|
||||||
github.com/mudler/edgevpn v0.31.1 h1:7qegiDWd0kAg6ljhNHxqvp8hbo/6BbzSdbb7/2WZfiY=
|
github.com/mudler/edgevpn v0.31.1 h1:7qegiDWd0kAg6ljhNHxqvp8hbo/6BbzSdbb7/2WZfiY=
|
||||||
github.com/mudler/edgevpn v0.31.1/go.mod h1:ftV5B0nKFzm4R8vR80UYnCb2nf7lxCRgAALxUEEgCf8=
|
github.com/mudler/edgevpn v0.31.1/go.mod h1:ftV5B0nKFzm4R8vR80UYnCb2nf7lxCRgAALxUEEgCf8=
|
||||||
github.com/mudler/go-piper v0.0.0-20241023091659-2494246fd9fc h1:RxwneJl1VgvikiX28EkpdAyL4yQVnJMrbquKospjHyA=
|
github.com/mudler/go-piper v0.0.0-20241023091659-2494246fd9fc h1:RxwneJl1VgvikiX28EkpdAyL4yQVnJMrbquKospjHyA=
|
||||||
|
|||||||
@@ -1,114 +0,0 @@
|
|||||||
package functions
|
|
||||||
|
|
||||||
import (
|
|
||||||
"strings"
|
|
||||||
)
|
|
||||||
|
|
||||||
// ExtractReasoning extracts reasoning content from thinking tags and returns
|
|
||||||
// both the extracted reasoning and the cleaned content (with tags removed).
|
|
||||||
// It handles <thinking>...</thinking> and <think>...</think> tags.
|
|
||||||
// Multiple reasoning blocks are concatenated with newlines.
|
|
||||||
func ExtractReasoning(content string) (reasoning string, cleanedContent string) {
|
|
||||||
if content == "" {
|
|
||||||
return "", content
|
|
||||||
}
|
|
||||||
|
|
||||||
var reasoningParts []string
|
|
||||||
var cleanedParts []string
|
|
||||||
remaining := content
|
|
||||||
|
|
||||||
// Define tag pairs to look for
|
|
||||||
tagPairs := []struct {
|
|
||||||
start string
|
|
||||||
end string
|
|
||||||
}{
|
|
||||||
{"<thinking>", "</thinking>"},
|
|
||||||
{"<think>", "</think>"},
|
|
||||||
}
|
|
||||||
|
|
||||||
// Track the last position we've processed
|
|
||||||
lastPos := 0
|
|
||||||
|
|
||||||
for {
|
|
||||||
// Find the earliest tag start
|
|
||||||
earliestStart := -1
|
|
||||||
earliestEnd := -1
|
|
||||||
isUnclosed := false
|
|
||||||
var matchedTag struct {
|
|
||||||
start string
|
|
||||||
end string
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, tagPair := range tagPairs {
|
|
||||||
startIdx := strings.Index(remaining[lastPos:], tagPair.start)
|
|
||||||
if startIdx == -1 {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
startIdx += lastPos
|
|
||||||
|
|
||||||
// Find the corresponding end tag
|
|
||||||
endIdx := strings.Index(remaining[startIdx+len(tagPair.start):], tagPair.end)
|
|
||||||
if endIdx == -1 {
|
|
||||||
// Unclosed tag - extract what we have
|
|
||||||
if earliestStart == -1 || startIdx < earliestStart {
|
|
||||||
earliestStart = startIdx
|
|
||||||
earliestEnd = len(remaining)
|
|
||||||
isUnclosed = true
|
|
||||||
matchedTag = tagPair
|
|
||||||
}
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
endIdx += startIdx + len(tagPair.start)
|
|
||||||
|
|
||||||
// Found a complete tag pair
|
|
||||||
if earliestStart == -1 || startIdx < earliestStart {
|
|
||||||
earliestStart = startIdx
|
|
||||||
earliestEnd = endIdx + len(tagPair.end)
|
|
||||||
isUnclosed = false
|
|
||||||
matchedTag = tagPair
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if earliestStart == -1 {
|
|
||||||
// No more tags found, add remaining content
|
|
||||||
if lastPos < len(remaining) {
|
|
||||||
cleanedParts = append(cleanedParts, remaining[lastPos:])
|
|
||||||
}
|
|
||||||
break
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add content before the tag
|
|
||||||
if earliestStart > lastPos {
|
|
||||||
cleanedParts = append(cleanedParts, remaining[lastPos:earliestStart])
|
|
||||||
}
|
|
||||||
|
|
||||||
// Extract reasoning content
|
|
||||||
reasoningStart := earliestStart + len(matchedTag.start)
|
|
||||||
// For unclosed tags, earliestEnd is already at the end of the string
|
|
||||||
// For closed tags, earliestEnd points to after the closing tag, so we subtract the end tag length
|
|
||||||
var reasoningEnd int
|
|
||||||
if isUnclosed {
|
|
||||||
// Unclosed tag - extract everything to the end
|
|
||||||
reasoningEnd = len(remaining)
|
|
||||||
} else {
|
|
||||||
// Closed tag - exclude the end tag
|
|
||||||
reasoningEnd = earliestEnd - len(matchedTag.end)
|
|
||||||
}
|
|
||||||
if reasoningEnd > reasoningStart {
|
|
||||||
reasoningContent := strings.TrimSpace(remaining[reasoningStart:reasoningEnd])
|
|
||||||
if reasoningContent != "" {
|
|
||||||
reasoningParts = append(reasoningParts, reasoningContent)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Move past this tag
|
|
||||||
lastPos = earliestEnd
|
|
||||||
}
|
|
||||||
|
|
||||||
// Combine reasoning parts
|
|
||||||
reasoning = strings.Join(reasoningParts, "\n\n")
|
|
||||||
// Combine cleaned content parts
|
|
||||||
cleanedContent = strings.Join(cleanedParts, "")
|
|
||||||
|
|
||||||
return reasoning, cleanedContent
|
|
||||||
}
|
|
||||||
@@ -1,261 +0,0 @@
|
|||||||
package functions_test
|
|
||||||
|
|
||||||
import (
|
|
||||||
"strings"
|
|
||||||
|
|
||||||
. "github.com/mudler/LocalAI/pkg/functions"
|
|
||||||
. "github.com/onsi/ginkgo/v2"
|
|
||||||
. "github.com/onsi/gomega"
|
|
||||||
)
|
|
||||||
|
|
||||||
var _ = Describe("ExtractReasoning", func() {
|
|
||||||
Context("when content has no reasoning tags", func() {
|
|
||||||
It("should return empty reasoning and original content", func() {
|
|
||||||
content := "This is regular content without any tags."
|
|
||||||
reasoning, cleaned := ExtractReasoning(content)
|
|
||||||
Expect(reasoning).To(BeEmpty())
|
|
||||||
Expect(cleaned).To(Equal(content))
|
|
||||||
})
|
|
||||||
|
|
||||||
It("should handle empty string", func() {
|
|
||||||
content := ""
|
|
||||||
reasoning, cleaned := ExtractReasoning(content)
|
|
||||||
Expect(reasoning).To(BeEmpty())
|
|
||||||
Expect(cleaned).To(BeEmpty())
|
|
||||||
})
|
|
||||||
|
|
||||||
It("should handle content with only whitespace", func() {
|
|
||||||
content := " \n\t "
|
|
||||||
reasoning, cleaned := ExtractReasoning(content)
|
|
||||||
Expect(reasoning).To(BeEmpty())
|
|
||||||
Expect(cleaned).To(Equal(content))
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
Context("when content has <thinking> tags", func() {
|
|
||||||
It("should extract reasoning from single thinking block", func() {
|
|
||||||
content := "Some text <thinking>This is my reasoning</thinking> More text"
|
|
||||||
reasoning, cleaned := ExtractReasoning(content)
|
|
||||||
Expect(reasoning).To(Equal("This is my reasoning"))
|
|
||||||
Expect(cleaned).To(Equal("Some text More text"))
|
|
||||||
})
|
|
||||||
|
|
||||||
It("should extract reasoning and preserve surrounding content", func() {
|
|
||||||
content := "Before <thinking>Reasoning here</thinking> After"
|
|
||||||
reasoning, cleaned := ExtractReasoning(content)
|
|
||||||
Expect(reasoning).To(Equal("Reasoning here"))
|
|
||||||
Expect(cleaned).To(Equal("Before After"))
|
|
||||||
})
|
|
||||||
|
|
||||||
It("should handle thinking block at the start", func() {
|
|
||||||
content := "<thinking>Start reasoning</thinking> Regular content"
|
|
||||||
reasoning, cleaned := ExtractReasoning(content)
|
|
||||||
Expect(reasoning).To(Equal("Start reasoning"))
|
|
||||||
Expect(cleaned).To(Equal(" Regular content"))
|
|
||||||
})
|
|
||||||
|
|
||||||
It("should handle thinking block at the end", func() {
|
|
||||||
content := "Regular content <thinking>End reasoning</thinking>"
|
|
||||||
reasoning, cleaned := ExtractReasoning(content)
|
|
||||||
Expect(reasoning).To(Equal("End reasoning"))
|
|
||||||
Expect(cleaned).To(Equal("Regular content "))
|
|
||||||
})
|
|
||||||
|
|
||||||
It("should handle only thinking block", func() {
|
|
||||||
content := "<thinking>Only reasoning</thinking>"
|
|
||||||
reasoning, cleaned := ExtractReasoning(content)
|
|
||||||
Expect(reasoning).To(Equal("Only reasoning"))
|
|
||||||
Expect(cleaned).To(BeEmpty())
|
|
||||||
})
|
|
||||||
|
|
||||||
It("should trim whitespace from reasoning content", func() {
|
|
||||||
content := "Text <thinking> \n Reasoning with spaces \n </thinking> More"
|
|
||||||
reasoning, cleaned := ExtractReasoning(content)
|
|
||||||
Expect(reasoning).To(Equal("Reasoning with spaces"))
|
|
||||||
Expect(cleaned).To(Equal("Text More"))
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
Context("when content has <think> tags", func() {
|
|
||||||
It("should extract reasoning from redacted_reasoning block", func() {
|
|
||||||
content := "Text <think>Redacted reasoning</think> More"
|
|
||||||
reasoning, cleaned := ExtractReasoning(content)
|
|
||||||
Expect(reasoning).To(Equal("Redacted reasoning"))
|
|
||||||
Expect(cleaned).To(Equal("Text More"))
|
|
||||||
})
|
|
||||||
|
|
||||||
It("should handle redacted_reasoning with multiline content", func() {
|
|
||||||
content := "Before <think>Line 1\nLine 2\nLine 3</think> After"
|
|
||||||
reasoning, cleaned := ExtractReasoning(content)
|
|
||||||
Expect(reasoning).To(Equal("Line 1\nLine 2\nLine 3"))
|
|
||||||
Expect(cleaned).To(Equal("Before After"))
|
|
||||||
})
|
|
||||||
|
|
||||||
It("should handle redacted_reasoning with complex content", func() {
|
|
||||||
content := "Start <think>Complex reasoning\nwith\nmultiple\nlines</think> End"
|
|
||||||
reasoning, cleaned := ExtractReasoning(content)
|
|
||||||
Expect(reasoning).To(Equal("Complex reasoning\nwith\nmultiple\nlines"))
|
|
||||||
Expect(cleaned).To(Equal("Start End"))
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
Context("when content has multiple reasoning blocks", func() {
|
|
||||||
It("should concatenate multiple thinking blocks with newlines", func() {
|
|
||||||
content := "Text <thinking>First</thinking> Middle <thinking>Second</thinking> End"
|
|
||||||
reasoning, cleaned := ExtractReasoning(content)
|
|
||||||
Expect(reasoning).To(Equal("First\n\nSecond"))
|
|
||||||
Expect(cleaned).To(Equal("Text Middle End"))
|
|
||||||
})
|
|
||||||
|
|
||||||
It("should handle multiple different tag types", func() {
|
|
||||||
content := "A <thinking>One</thinking> B <think>Two</think> C <think>Three</think> D"
|
|
||||||
reasoning, cleaned := ExtractReasoning(content)
|
|
||||||
Expect(reasoning).To(ContainSubstring("One"))
|
|
||||||
Expect(reasoning).To(ContainSubstring("Two"))
|
|
||||||
Expect(reasoning).To(ContainSubstring("Three"))
|
|
||||||
Expect(cleaned).To(Equal("A B C D"))
|
|
||||||
})
|
|
||||||
|
|
||||||
It("should handle nested tags correctly (extracts first match)", func() {
|
|
||||||
content := "Text <thinking>Outer <think>Inner</think></thinking> More"
|
|
||||||
reasoning, cleaned := ExtractReasoning(content)
|
|
||||||
// Should extract the outer thinking block
|
|
||||||
Expect(reasoning).To(ContainSubstring("Outer"))
|
|
||||||
Expect(reasoning).To(ContainSubstring("Inner"))
|
|
||||||
Expect(cleaned).To(Equal("Text More"))
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
Context("when content has unclosed reasoning tags", func() {
|
|
||||||
It("should extract unclosed thinking block", func() {
|
|
||||||
content := "Text <thinking>Unclosed reasoning"
|
|
||||||
reasoning, cleaned := ExtractReasoning(content)
|
|
||||||
Expect(reasoning).To(Equal("Unclosed reasoning"))
|
|
||||||
Expect(cleaned).To(Equal("Text "))
|
|
||||||
})
|
|
||||||
|
|
||||||
It("should extract unclosed think block", func() {
|
|
||||||
content := "Before <think>Incomplete"
|
|
||||||
reasoning, cleaned := ExtractReasoning(content)
|
|
||||||
Expect(reasoning).To(Equal("Incomplete"))
|
|
||||||
Expect(cleaned).To(Equal("Before "))
|
|
||||||
})
|
|
||||||
|
|
||||||
It("should extract unclosed redacted_reasoning block", func() {
|
|
||||||
content := "Start <think>Partial reasoning content"
|
|
||||||
reasoning, cleaned := ExtractReasoning(content)
|
|
||||||
Expect(reasoning).To(Equal("Partial reasoning content"))
|
|
||||||
Expect(cleaned).To(Equal("Start "))
|
|
||||||
})
|
|
||||||
|
|
||||||
It("should handle unclosed tag at the end", func() {
|
|
||||||
content := "Regular content <thinking>Unclosed at end"
|
|
||||||
reasoning, cleaned := ExtractReasoning(content)
|
|
||||||
Expect(reasoning).To(Equal("Unclosed at end"))
|
|
||||||
Expect(cleaned).To(Equal("Regular content "))
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
Context("when content has empty reasoning blocks", func() {
|
|
||||||
It("should ignore empty thinking block", func() {
|
|
||||||
content := "Text <thinking></thinking> More"
|
|
||||||
reasoning, cleaned := ExtractReasoning(content)
|
|
||||||
Expect(reasoning).To(BeEmpty())
|
|
||||||
Expect(cleaned).To(Equal("Text More"))
|
|
||||||
})
|
|
||||||
|
|
||||||
It("should ignore thinking block with only whitespace", func() {
|
|
||||||
content := "Text <thinking> \n\t </thinking> More"
|
|
||||||
reasoning, cleaned := ExtractReasoning(content)
|
|
||||||
Expect(reasoning).To(BeEmpty())
|
|
||||||
Expect(cleaned).To(Equal("Text More"))
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
Context("when content has reasoning tags with special characters", func() {
|
|
||||||
It("should handle reasoning with newlines", func() {
|
|
||||||
content := "Before <thinking>Line 1\nLine 2\nLine 3</thinking> After"
|
|
||||||
reasoning, cleaned := ExtractReasoning(content)
|
|
||||||
Expect(reasoning).To(Equal("Line 1\nLine 2\nLine 3"))
|
|
||||||
Expect(cleaned).To(Equal("Before After"))
|
|
||||||
})
|
|
||||||
|
|
||||||
It("should handle reasoning with code blocks", func() {
|
|
||||||
content := "Text <thinking>Reasoning with ```code``` blocks</thinking> More"
|
|
||||||
reasoning, cleaned := ExtractReasoning(content)
|
|
||||||
Expect(reasoning).To(Equal("Reasoning with ```code``` blocks"))
|
|
||||||
Expect(cleaned).To(Equal("Text More"))
|
|
||||||
})
|
|
||||||
|
|
||||||
It("should handle reasoning with JSON", func() {
|
|
||||||
content := "Before <think>{\"key\": \"value\"}</think> After"
|
|
||||||
reasoning, cleaned := ExtractReasoning(content)
|
|
||||||
Expect(reasoning).To(Equal("{\"key\": \"value\"}"))
|
|
||||||
Expect(cleaned).To(Equal("Before After"))
|
|
||||||
})
|
|
||||||
|
|
||||||
It("should handle reasoning with HTML-like content", func() {
|
|
||||||
content := "Text <thinking>Reasoning with <tags> inside</thinking> More"
|
|
||||||
reasoning, cleaned := ExtractReasoning(content)
|
|
||||||
Expect(reasoning).To(Equal("Reasoning with <tags> inside"))
|
|
||||||
Expect(cleaned).To(Equal("Text More"))
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
Context("when content has reasoning mixed with regular content", func() {
|
|
||||||
It("should preserve content order correctly", func() {
|
|
||||||
content := "Start <thinking>Reasoning</thinking> Middle <think>More reasoning</think> End"
|
|
||||||
reasoning, cleaned := ExtractReasoning(content)
|
|
||||||
Expect(reasoning).To(ContainSubstring("Reasoning"))
|
|
||||||
Expect(reasoning).To(ContainSubstring("More reasoning"))
|
|
||||||
Expect(cleaned).To(Equal("Start Middle End"))
|
|
||||||
})
|
|
||||||
|
|
||||||
It("should handle reasoning in the middle of a sentence", func() {
|
|
||||||
content := "This is a <thinking>reasoning</thinking> sentence."
|
|
||||||
reasoning, cleaned := ExtractReasoning(content)
|
|
||||||
Expect(reasoning).To(Equal("reasoning"))
|
|
||||||
Expect(cleaned).To(Equal("This is a sentence."))
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
Context("edge cases", func() {
|
|
||||||
It("should handle content with only opening tag", func() {
|
|
||||||
content := "<thinking>"
|
|
||||||
reasoning, cleaned := ExtractReasoning(content)
|
|
||||||
Expect(reasoning).To(BeEmpty())
|
|
||||||
Expect(cleaned).To(Equal(""))
|
|
||||||
})
|
|
||||||
|
|
||||||
It("should handle content with only closing tag", func() {
|
|
||||||
content := "</thinking>"
|
|
||||||
reasoning, cleaned := ExtractReasoning(content)
|
|
||||||
Expect(reasoning).To(BeEmpty())
|
|
||||||
Expect(cleaned).To(Equal("</thinking>"))
|
|
||||||
})
|
|
||||||
|
|
||||||
It("should handle mismatched tags", func() {
|
|
||||||
content := "<thinking>Content</think>"
|
|
||||||
reasoning, cleaned := ExtractReasoning(content)
|
|
||||||
// Should extract unclosed thinking block
|
|
||||||
Expect(reasoning).To(ContainSubstring("Content"))
|
|
||||||
Expect(cleaned).To(Equal(""))
|
|
||||||
})
|
|
||||||
|
|
||||||
It("should handle very long reasoning content", func() {
|
|
||||||
longReasoning := strings.Repeat("This is reasoning content. ", 100)
|
|
||||||
content := "Text <thinking>" + longReasoning + "</thinking> More"
|
|
||||||
reasoning, cleaned := ExtractReasoning(content)
|
|
||||||
// TrimSpace is applied, so we need to account for that
|
|
||||||
Expect(reasoning).To(Equal(strings.TrimSpace(longReasoning)))
|
|
||||||
Expect(cleaned).To(Equal("Text More"))
|
|
||||||
})
|
|
||||||
|
|
||||||
It("should handle reasoning with unicode characters", func() {
|
|
||||||
content := "Text <thinking>Reasoning with 中文 and emoji 🧠</thinking> More"
|
|
||||||
reasoning, cleaned := ExtractReasoning(content)
|
|
||||||
Expect(reasoning).To(Equal("Reasoning with 中文 and emoji 🧠"))
|
|
||||||
Expect(cleaned).To(Equal("Text More"))
|
|
||||||
})
|
|
||||||
})
|
|
||||||
})
|
|
||||||
8
pkg/reasoning/config.go
Normal file
8
pkg/reasoning/config.go
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
package reasoning
|
||||||
|
|
||||||
|
type ReasoningConfig struct {
|
||||||
|
// ThinkingForcedOpen indicates that the model outputs reasoning without an opening tag.
|
||||||
|
// When true, all content from the start is treated as reasoning until a closing tag is found.
|
||||||
|
// This is useful for models like GLM-4 that output reasoning without <think> but end with </think>.
|
||||||
|
ThinkingForcedOpen bool `yaml:"thinking_forced_open,omitempty" json:"thinking_forced_open,omitempty"`
|
||||||
|
}
|
||||||
18
pkg/reasoning/options.go
Normal file
18
pkg/reasoning/options.go
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
package reasoning
|
||||||
|
|
||||||
|
// options holds the configuration for reasoning extraction
|
||||||
|
type options struct {
|
||||||
|
thinkingForcedOpen bool
|
||||||
|
}
|
||||||
|
|
||||||
|
// Option is a functional option for configuring reasoning extraction
|
||||||
|
type Option func(*options)
|
||||||
|
|
||||||
|
// WithThinkingForcedOpen configures the extractor to treat all content from the start
|
||||||
|
// as reasoning until a closing tag is found. This is useful for models like GLM-4
|
||||||
|
// that output reasoning without <think> but end with </think>.
|
||||||
|
func WithThinkingForcedOpen() Option {
|
||||||
|
return func(o *options) {
|
||||||
|
o.thinkingForcedOpen = true
|
||||||
|
}
|
||||||
|
}
|
||||||
256
pkg/reasoning/reasoning.go
Normal file
256
pkg/reasoning/reasoning.go
Normal file
@@ -0,0 +1,256 @@
|
|||||||
|
package reasoning
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Common thinking/reasoning opening tags used by various models.
|
||||||
|
// These match the tags detected by llama.cpp in common/chat.cpp
|
||||||
|
var thinkingOpenTags = []string{
|
||||||
|
// DeepSeek R1, V3.1, Nemotron V2, MiniMax M2, Hermes 2 Pro, Granite, Exaone MOE
|
||||||
|
"<think>\n",
|
||||||
|
"<think>",
|
||||||
|
// Generic thinking tags
|
||||||
|
"<thinking>\n",
|
||||||
|
"<thinking>",
|
||||||
|
// Apertus
|
||||||
|
"<|inner_prefix|>",
|
||||||
|
// Command R7B
|
||||||
|
"<|START_THINKING|>",
|
||||||
|
// Seed
|
||||||
|
"<seed:think>",
|
||||||
|
// Magistral (not in llama.cpp but common)
|
||||||
|
"[THINK]\n",
|
||||||
|
"[THINK]",
|
||||||
|
}
|
||||||
|
|
||||||
|
// DetectThinkingForcedOpen checks if a prompt ends with a thinking opening tag.
|
||||||
|
// This is used to automatically detect when the model template has already added
|
||||||
|
// the opening thinking tag, meaning the model will output reasoning content directly.
|
||||||
|
// Returns true if the prompt ends with a known thinking opening tag.
|
||||||
|
func DetectThinkingForcedOpen(prompt string) bool {
|
||||||
|
for _, tag := range thinkingOpenTags {
|
||||||
|
if strings.HasSuffix(prompt, tag) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract extracts reasoning content from thinking tags and returns
|
||||||
|
// both the extracted reasoning and the cleaned content (with tags removed).
|
||||||
|
// It handles <thinking>...</thinking> and <think>...</think> tags.
|
||||||
|
// Multiple reasoning blocks are concatenated with newlines.
|
||||||
|
// It also handles the case where only a closing tag is present (no opening tag),
|
||||||
|
// in which case everything before the closing tag is treated as reasoning.
|
||||||
|
//
|
||||||
|
// Use WithThinkingForcedOpen() option when all content from the start should be
|
||||||
|
// treated as reasoning until a closing tag is found.
|
||||||
|
func Extract(content string, opts ...Option) (reasoning string, cleanedContent string) {
|
||||||
|
if content == "" {
|
||||||
|
return "", content
|
||||||
|
}
|
||||||
|
|
||||||
|
cfg := &options{}
|
||||||
|
for _, opt := range opts {
|
||||||
|
opt(cfg)
|
||||||
|
}
|
||||||
|
|
||||||
|
if cfg.thinkingForcedOpen {
|
||||||
|
return extractForcedOpen(content)
|
||||||
|
}
|
||||||
|
|
||||||
|
return extractFromTags(content)
|
||||||
|
}
|
||||||
|
|
||||||
|
// extractForcedOpen handles the case where reasoning starts without an opening tag.
|
||||||
|
// All content from the start is treated as reasoning until a closing tag is found.
|
||||||
|
func extractForcedOpen(content string) (reasoning string, cleanedContent string) {
|
||||||
|
// Look for the earliest closing tag
|
||||||
|
// These match the closing tags used by llama.cpp for various models
|
||||||
|
closingTags := []string{
|
||||||
|
"</thinking>",
|
||||||
|
"</think>",
|
||||||
|
"<|END_THINKING|>", // Command R7B
|
||||||
|
"<|inner_suffix|>", // Apertus
|
||||||
|
"</seed:think>", // Seed
|
||||||
|
"[/THINK]", // Magistral
|
||||||
|
}
|
||||||
|
|
||||||
|
earliestCloseIdx := -1
|
||||||
|
var matchedCloseTag string
|
||||||
|
|
||||||
|
for _, closeTag := range closingTags {
|
||||||
|
idx := strings.Index(content, closeTag)
|
||||||
|
if idx != -1 && (earliestCloseIdx == -1 || idx < earliestCloseIdx) {
|
||||||
|
earliestCloseIdx = idx
|
||||||
|
matchedCloseTag = closeTag
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if earliestCloseIdx == -1 {
|
||||||
|
// No closing tag found - all content is reasoning (still streaming)
|
||||||
|
return strings.TrimSpace(content), ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// Found closing tag - everything before is reasoning, everything after is content
|
||||||
|
reasoning = strings.TrimSpace(content[:earliestCloseIdx])
|
||||||
|
cleanedContent = content[earliestCloseIdx+len(matchedCloseTag):]
|
||||||
|
|
||||||
|
// Continue processing the rest for any additional reasoning blocks
|
||||||
|
if cleanedContent != "" {
|
||||||
|
additionalReasoning, finalContent := extractFromTags(cleanedContent)
|
||||||
|
if additionalReasoning != "" {
|
||||||
|
if reasoning != "" {
|
||||||
|
reasoning = reasoning + "\n\n" + additionalReasoning
|
||||||
|
} else {
|
||||||
|
reasoning = additionalReasoning
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cleanedContent = finalContent
|
||||||
|
}
|
||||||
|
|
||||||
|
return reasoning, cleanedContent
|
||||||
|
}
|
||||||
|
|
||||||
|
// extractFromTags extracts reasoning content from thinking tags.
|
||||||
|
// This is the core implementation that handles standard tag-based extraction.
|
||||||
|
func extractFromTags(content string) (reasoning string, cleanedContent string) {
|
||||||
|
if content == "" {
|
||||||
|
return "", content
|
||||||
|
}
|
||||||
|
|
||||||
|
var reasoningParts []string
|
||||||
|
var cleanedParts []string
|
||||||
|
remaining := content
|
||||||
|
|
||||||
|
// Define tag pairs to look for
|
||||||
|
// These match the tags used by llama.cpp for various models
|
||||||
|
tagPairs := []struct {
|
||||||
|
start string
|
||||||
|
end string
|
||||||
|
}{
|
||||||
|
{"<thinking>", "</thinking>"},
|
||||||
|
{"<think>", "</think>"},
|
||||||
|
{"<|START_THINKING|>", "<|END_THINKING|>"}, // Command R7B
|
||||||
|
{"<|inner_prefix|>", "<|inner_suffix|>"}, // Apertus
|
||||||
|
{"<seed:think>", "</seed:think>"}, // Seed
|
||||||
|
{"[THINK]", "[/THINK]"}, // Magistral
|
||||||
|
}
|
||||||
|
|
||||||
|
// Track the last position we've processed
|
||||||
|
lastPos := 0
|
||||||
|
|
||||||
|
for {
|
||||||
|
// Find the earliest tag start
|
||||||
|
earliestStart := -1
|
||||||
|
earliestEnd := -1
|
||||||
|
isUnclosed := false
|
||||||
|
isClosingOnly := false
|
||||||
|
var matchedTag struct {
|
||||||
|
start string
|
||||||
|
end string
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tagPair := range tagPairs {
|
||||||
|
startIdx := strings.Index(remaining[lastPos:], tagPair.start)
|
||||||
|
endIdx := strings.Index(remaining[lastPos:], tagPair.end)
|
||||||
|
|
||||||
|
// Check for closing-only tag (closing tag appears before or without opening tag)
|
||||||
|
if endIdx != -1 && (startIdx == -1 || endIdx < startIdx) {
|
||||||
|
// Found a closing tag without a preceding opening tag
|
||||||
|
closingTagPos := endIdx + lastPos
|
||||||
|
if earliestStart == -1 || closingTagPos < earliestStart || (isClosingOnly && closingTagPos < earliestEnd) {
|
||||||
|
earliestStart = lastPos
|
||||||
|
earliestEnd = closingTagPos + len(tagPair.end)
|
||||||
|
isClosingOnly = true
|
||||||
|
isUnclosed = false
|
||||||
|
matchedTag = tagPair
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if startIdx == -1 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
startIdx += lastPos
|
||||||
|
|
||||||
|
// Find the corresponding end tag after the start tag
|
||||||
|
endIdxAfterStart := strings.Index(remaining[startIdx+len(tagPair.start):], tagPair.end)
|
||||||
|
if endIdxAfterStart == -1 {
|
||||||
|
// Unclosed tag - extract what we have
|
||||||
|
if earliestStart == -1 || startIdx < earliestStart {
|
||||||
|
earliestStart = startIdx
|
||||||
|
earliestEnd = len(remaining)
|
||||||
|
isUnclosed = true
|
||||||
|
isClosingOnly = false
|
||||||
|
matchedTag = tagPair
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
endIdxAfterStart += startIdx + len(tagPair.start)
|
||||||
|
|
||||||
|
// Found a complete tag pair
|
||||||
|
if earliestStart == -1 || startIdx < earliestStart {
|
||||||
|
earliestStart = startIdx
|
||||||
|
earliestEnd = endIdxAfterStart + len(tagPair.end)
|
||||||
|
isUnclosed = false
|
||||||
|
isClosingOnly = false
|
||||||
|
matchedTag = tagPair
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if earliestStart == -1 {
|
||||||
|
// No more tags found, add remaining content
|
||||||
|
if lastPos < len(remaining) {
|
||||||
|
cleanedParts = append(cleanedParts, remaining[lastPos:])
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
if isClosingOnly {
|
||||||
|
// Closing tag without opening tag - content before closing tag is reasoning
|
||||||
|
reasoningContent := strings.TrimSpace(remaining[lastPos : earliestEnd-len(matchedTag.end)])
|
||||||
|
if reasoningContent != "" {
|
||||||
|
reasoningParts = append(reasoningParts, reasoningContent)
|
||||||
|
}
|
||||||
|
// Move past the closing tag
|
||||||
|
lastPos = earliestEnd
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add content before the tag
|
||||||
|
if earliestStart > lastPos {
|
||||||
|
cleanedParts = append(cleanedParts, remaining[lastPos:earliestStart])
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract reasoning content
|
||||||
|
reasoningStart := earliestStart + len(matchedTag.start)
|
||||||
|
// For unclosed tags, earliestEnd is already at the end of the string
|
||||||
|
// For closed tags, earliestEnd points to after the closing tag, so we subtract the end tag length
|
||||||
|
var reasoningEnd int
|
||||||
|
if isUnclosed {
|
||||||
|
// Unclosed tag - extract everything to the end
|
||||||
|
reasoningEnd = len(remaining)
|
||||||
|
} else {
|
||||||
|
// Closed tag - exclude the end tag
|
||||||
|
reasoningEnd = earliestEnd - len(matchedTag.end)
|
||||||
|
}
|
||||||
|
if reasoningEnd > reasoningStart {
|
||||||
|
reasoningContent := strings.TrimSpace(remaining[reasoningStart:reasoningEnd])
|
||||||
|
if reasoningContent != "" {
|
||||||
|
reasoningParts = append(reasoningParts, reasoningContent)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Move past this tag
|
||||||
|
lastPos = earliestEnd
|
||||||
|
}
|
||||||
|
|
||||||
|
// Combine reasoning parts
|
||||||
|
reasoning = strings.Join(reasoningParts, "\n\n")
|
||||||
|
// Combine cleaned content parts
|
||||||
|
cleanedContent = strings.Join(cleanedParts, "")
|
||||||
|
|
||||||
|
return reasoning, cleanedContent
|
||||||
|
}
|
||||||
13
pkg/reasoning/reasoning_suite_test.go
Normal file
13
pkg/reasoning/reasoning_suite_test.go
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
package reasoning_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
. "github.com/onsi/ginkgo/v2"
|
||||||
|
. "github.com/onsi/gomega"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestReasoning(t *testing.T) {
|
||||||
|
RegisterFailHandler(Fail)
|
||||||
|
RunSpecs(t, "Reasoning Suite")
|
||||||
|
}
|
||||||
499
pkg/reasoning/reasoning_test.go
Normal file
499
pkg/reasoning/reasoning_test.go
Normal file
@@ -0,0 +1,499 @@
|
|||||||
|
package reasoning_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
. "github.com/mudler/LocalAI/pkg/reasoning"
|
||||||
|
. "github.com/onsi/ginkgo/v2"
|
||||||
|
. "github.com/onsi/gomega"
|
||||||
|
)
|
||||||
|
|
||||||
|
var _ = Describe("DetectThinkingForcedOpen", func() {
|
||||||
|
It("should detect <think> at end of prompt", func() {
|
||||||
|
Expect(DetectThinkingForcedOpen("Some prompt<think>")).To(BeTrue())
|
||||||
|
Expect(DetectThinkingForcedOpen("Some prompt<think>\n")).To(BeTrue())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should detect <thinking> at end of prompt", func() {
|
||||||
|
Expect(DetectThinkingForcedOpen("Some prompt<thinking>")).To(BeTrue())
|
||||||
|
Expect(DetectThinkingForcedOpen("Some prompt<thinking>\n")).To(BeTrue())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should detect model-specific tags", func() {
|
||||||
|
Expect(DetectThinkingForcedOpen("Some prompt<|inner_prefix|>")).To(BeTrue())
|
||||||
|
Expect(DetectThinkingForcedOpen("Some prompt<|START_THINKING|>")).To(BeTrue())
|
||||||
|
Expect(DetectThinkingForcedOpen("Some prompt<seed:think>")).To(BeTrue())
|
||||||
|
Expect(DetectThinkingForcedOpen("Some prompt[THINK]")).To(BeTrue())
|
||||||
|
Expect(DetectThinkingForcedOpen("Some prompt[THINK]\n")).To(BeTrue())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should not detect if tag is in the middle", func() {
|
||||||
|
Expect(DetectThinkingForcedOpen("Some <think> prompt")).To(BeFalse())
|
||||||
|
Expect(DetectThinkingForcedOpen("<think>reasoning</think>")).To(BeFalse())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should not detect if no thinking tag", func() {
|
||||||
|
Expect(DetectThinkingForcedOpen("Some regular prompt")).To(BeFalse())
|
||||||
|
Expect(DetectThinkingForcedOpen("")).To(BeFalse())
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
var _ = Describe("Extract", func() {
|
||||||
|
Context("when content has no reasoning tags", func() {
|
||||||
|
It("should return empty reasoning and original content", func() {
|
||||||
|
content := "This is regular content without any tags."
|
||||||
|
reasoning, cleaned := Extract(content)
|
||||||
|
Expect(reasoning).To(BeEmpty())
|
||||||
|
Expect(cleaned).To(Equal(content))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should handle empty string", func() {
|
||||||
|
content := ""
|
||||||
|
reasoning, cleaned := Extract(content)
|
||||||
|
Expect(reasoning).To(BeEmpty())
|
||||||
|
Expect(cleaned).To(BeEmpty())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should handle content with only whitespace", func() {
|
||||||
|
content := " \n\t "
|
||||||
|
reasoning, cleaned := Extract(content)
|
||||||
|
Expect(reasoning).To(BeEmpty())
|
||||||
|
Expect(cleaned).To(Equal(content))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
Context("when content has <thinking> tags", func() {
|
||||||
|
It("should extract reasoning from single thinking block", func() {
|
||||||
|
content := "Some text <thinking>This is my reasoning</thinking> More text"
|
||||||
|
reasoning, cleaned := Extract(content)
|
||||||
|
Expect(reasoning).To(Equal("This is my reasoning"))
|
||||||
|
Expect(cleaned).To(Equal("Some text More text"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should extract reasoning and preserve surrounding content", func() {
|
||||||
|
content := "Before <thinking>Reasoning here</thinking> After"
|
||||||
|
reasoning, cleaned := Extract(content)
|
||||||
|
Expect(reasoning).To(Equal("Reasoning here"))
|
||||||
|
Expect(cleaned).To(Equal("Before After"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should handle thinking block at the start", func() {
|
||||||
|
content := "<thinking>Start reasoning</thinking> Regular content"
|
||||||
|
reasoning, cleaned := Extract(content)
|
||||||
|
Expect(reasoning).To(Equal("Start reasoning"))
|
||||||
|
Expect(cleaned).To(Equal(" Regular content"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should handle thinking block at the end", func() {
|
||||||
|
content := "Regular content <thinking>End reasoning</thinking>"
|
||||||
|
reasoning, cleaned := Extract(content)
|
||||||
|
Expect(reasoning).To(Equal("End reasoning"))
|
||||||
|
Expect(cleaned).To(Equal("Regular content "))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should handle only thinking block", func() {
|
||||||
|
content := "<thinking>Only reasoning</thinking>"
|
||||||
|
reasoning, cleaned := Extract(content)
|
||||||
|
Expect(reasoning).To(Equal("Only reasoning"))
|
||||||
|
Expect(cleaned).To(BeEmpty())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should trim whitespace from reasoning content", func() {
|
||||||
|
content := "Text <thinking> \n Reasoning with spaces \n </thinking> More"
|
||||||
|
reasoning, cleaned := Extract(content)
|
||||||
|
Expect(reasoning).To(Equal("Reasoning with spaces"))
|
||||||
|
Expect(cleaned).To(Equal("Text More"))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
Context("when content has <think> tags", func() {
|
||||||
|
It("should extract reasoning from redacted_reasoning block", func() {
|
||||||
|
content := "Text <think>Redacted reasoning</think> More"
|
||||||
|
reasoning, cleaned := Extract(content)
|
||||||
|
Expect(reasoning).To(Equal("Redacted reasoning"))
|
||||||
|
Expect(cleaned).To(Equal("Text More"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should handle redacted_reasoning with multiline content", func() {
|
||||||
|
content := "Before <think>Line 1\nLine 2\nLine 3</think> After"
|
||||||
|
reasoning, cleaned := Extract(content)
|
||||||
|
Expect(reasoning).To(Equal("Line 1\nLine 2\nLine 3"))
|
||||||
|
Expect(cleaned).To(Equal("Before After"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should handle redacted_reasoning with complex content", func() {
|
||||||
|
content := "Start <think>Complex reasoning\nwith\nmultiple\nlines</think> End"
|
||||||
|
reasoning, cleaned := Extract(content)
|
||||||
|
Expect(reasoning).To(Equal("Complex reasoning\nwith\nmultiple\nlines"))
|
||||||
|
Expect(cleaned).To(Equal("Start End"))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
Context("when content has multiple reasoning blocks", func() {
|
||||||
|
It("should concatenate multiple thinking blocks with newlines", func() {
|
||||||
|
content := "Text <thinking>First</thinking> Middle <thinking>Second</thinking> End"
|
||||||
|
reasoning, cleaned := Extract(content)
|
||||||
|
Expect(reasoning).To(Equal("First\n\nSecond"))
|
||||||
|
Expect(cleaned).To(Equal("Text Middle End"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should handle multiple different tag types", func() {
|
||||||
|
content := "A <thinking>One</thinking> B <think>Two</think> C <think>Three</think> D"
|
||||||
|
reasoning, cleaned := Extract(content)
|
||||||
|
Expect(reasoning).To(ContainSubstring("One"))
|
||||||
|
Expect(reasoning).To(ContainSubstring("Two"))
|
||||||
|
Expect(reasoning).To(ContainSubstring("Three"))
|
||||||
|
Expect(cleaned).To(Equal("A B C D"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should handle nested tags correctly (extracts first match)", func() {
|
||||||
|
content := "Text <thinking>Outer <think>Inner</think></thinking> More"
|
||||||
|
reasoning, cleaned := Extract(content)
|
||||||
|
// Should extract the outer thinking block
|
||||||
|
Expect(reasoning).To(ContainSubstring("Outer"))
|
||||||
|
Expect(reasoning).To(ContainSubstring("Inner"))
|
||||||
|
Expect(cleaned).To(Equal("Text More"))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
Context("when content has unclosed reasoning tags", func() {
|
||||||
|
It("should extract unclosed thinking block", func() {
|
||||||
|
content := "Text <thinking>Unclosed reasoning"
|
||||||
|
reasoning, cleaned := Extract(content)
|
||||||
|
Expect(reasoning).To(Equal("Unclosed reasoning"))
|
||||||
|
Expect(cleaned).To(Equal("Text "))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should extract unclosed think block", func() {
|
||||||
|
content := "Before <think>Incomplete"
|
||||||
|
reasoning, cleaned := Extract(content)
|
||||||
|
Expect(reasoning).To(Equal("Incomplete"))
|
||||||
|
Expect(cleaned).To(Equal("Before "))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should extract unclosed redacted_reasoning block", func() {
|
||||||
|
content := "Start <think>Partial reasoning content"
|
||||||
|
reasoning, cleaned := Extract(content)
|
||||||
|
Expect(reasoning).To(Equal("Partial reasoning content"))
|
||||||
|
Expect(cleaned).To(Equal("Start "))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should handle unclosed tag at the end", func() {
|
||||||
|
content := "Regular content <thinking>Unclosed at end"
|
||||||
|
reasoning, cleaned := Extract(content)
|
||||||
|
Expect(reasoning).To(Equal("Unclosed at end"))
|
||||||
|
Expect(cleaned).To(Equal("Regular content "))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
Context("when content has empty reasoning blocks", func() {
|
||||||
|
It("should ignore empty thinking block", func() {
|
||||||
|
content := "Text <thinking></thinking> More"
|
||||||
|
reasoning, cleaned := Extract(content)
|
||||||
|
Expect(reasoning).To(BeEmpty())
|
||||||
|
Expect(cleaned).To(Equal("Text More"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should ignore thinking block with only whitespace", func() {
|
||||||
|
content := "Text <thinking> \n\t </thinking> More"
|
||||||
|
reasoning, cleaned := Extract(content)
|
||||||
|
Expect(reasoning).To(BeEmpty())
|
||||||
|
Expect(cleaned).To(Equal("Text More"))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
Context("when content has reasoning tags with special characters", func() {
|
||||||
|
It("should handle reasoning with newlines", func() {
|
||||||
|
content := "Before <thinking>Line 1\nLine 2\nLine 3</thinking> After"
|
||||||
|
reasoning, cleaned := Extract(content)
|
||||||
|
Expect(reasoning).To(Equal("Line 1\nLine 2\nLine 3"))
|
||||||
|
Expect(cleaned).To(Equal("Before After"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should handle reasoning with code blocks", func() {
|
||||||
|
content := "Text <thinking>Reasoning with ```code``` blocks</thinking> More"
|
||||||
|
reasoning, cleaned := Extract(content)
|
||||||
|
Expect(reasoning).To(Equal("Reasoning with ```code``` blocks"))
|
||||||
|
Expect(cleaned).To(Equal("Text More"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should handle reasoning with JSON", func() {
|
||||||
|
content := "Before <think>{\"key\": \"value\"}</think> After"
|
||||||
|
reasoning, cleaned := Extract(content)
|
||||||
|
Expect(reasoning).To(Equal("{\"key\": \"value\"}"))
|
||||||
|
Expect(cleaned).To(Equal("Before After"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should handle reasoning with HTML-like content", func() {
|
||||||
|
content := "Text <thinking>Reasoning with <tags> inside</thinking> More"
|
||||||
|
reasoning, cleaned := Extract(content)
|
||||||
|
Expect(reasoning).To(Equal("Reasoning with <tags> inside"))
|
||||||
|
Expect(cleaned).To(Equal("Text More"))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
Context("when content has reasoning mixed with regular content", func() {
|
||||||
|
It("should preserve content order correctly", func() {
|
||||||
|
content := "Start <thinking>Reasoning</thinking> Middle <think>More reasoning</think> End"
|
||||||
|
reasoning, cleaned := Extract(content)
|
||||||
|
Expect(reasoning).To(ContainSubstring("Reasoning"))
|
||||||
|
Expect(reasoning).To(ContainSubstring("More reasoning"))
|
||||||
|
Expect(cleaned).To(Equal("Start Middle End"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should handle reasoning in the middle of a sentence", func() {
|
||||||
|
content := "This is a <thinking>reasoning</thinking> sentence."
|
||||||
|
reasoning, cleaned := Extract(content)
|
||||||
|
Expect(reasoning).To(Equal("reasoning"))
|
||||||
|
Expect(cleaned).To(Equal("This is a sentence."))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
Context("edge cases without WithThinkingForcedOpen", func() {
|
||||||
|
It("should handle content with only opening tag", func() {
|
||||||
|
content := "<thinking>"
|
||||||
|
reasoning, cleaned := Extract(content)
|
||||||
|
Expect(reasoning).To(BeEmpty())
|
||||||
|
Expect(cleaned).To(Equal(""))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should handle content with only closing tag (no content before)", func() {
|
||||||
|
content := "</thinking>"
|
||||||
|
reasoning, cleaned := Extract(content)
|
||||||
|
Expect(reasoning).To(BeEmpty())
|
||||||
|
Expect(cleaned).To(BeEmpty())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should extract reasoning when only closing tag is present", func() {
|
||||||
|
// GLM-4 style: reasoning content followed by closing tag without opening tag
|
||||||
|
content := "This is reasoning content</think>this is the actual response"
|
||||||
|
reasoning, cleaned := Extract(content)
|
||||||
|
Expect(reasoning).To(Equal("This is reasoning content"))
|
||||||
|
Expect(cleaned).To(Equal("this is the actual response"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should handle closing-only tag with multiline reasoning", func() {
|
||||||
|
content := "1. First point\n2. Second point\n3. Third point</think>Final answer"
|
||||||
|
reasoning, cleaned := Extract(content)
|
||||||
|
Expect(reasoning).To(Equal("1. First point\n2. Second point\n3. Third point"))
|
||||||
|
Expect(cleaned).To(Equal("Final answer"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should handle closing-only tag with complex reasoning (GLM-4 example)", func() {
|
||||||
|
content := "**Analyze the user's input:** The user says something.\n\n**Final Decision:** Output the text.</think>this is a test"
|
||||||
|
reasoning, cleaned := Extract(content)
|
||||||
|
Expect(reasoning).To(Equal("**Analyze the user's input:** The user says something.\n\n**Final Decision:** Output the text."))
|
||||||
|
Expect(cleaned).To(Equal("this is a test"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should handle closing-only thinking tag", func() {
|
||||||
|
content := "Some reasoning here</thinking>actual content"
|
||||||
|
reasoning, cleaned := Extract(content)
|
||||||
|
Expect(reasoning).To(Equal("Some reasoning here"))
|
||||||
|
Expect(cleaned).To(Equal("actual content"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should handle mismatched tags", func() {
|
||||||
|
content := "<thinking>Content</think>"
|
||||||
|
reasoning, cleaned := Extract(content)
|
||||||
|
// Should extract unclosed thinking block
|
||||||
|
Expect(reasoning).To(ContainSubstring("Content"))
|
||||||
|
Expect(cleaned).To(Equal(""))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should handle very long reasoning content", func() {
|
||||||
|
longReasoning := strings.Repeat("This is reasoning content. ", 100)
|
||||||
|
content := "Text <thinking>" + longReasoning + "</thinking> More"
|
||||||
|
reasoning, cleaned := Extract(content)
|
||||||
|
// TrimSpace is applied, so we need to account for that
|
||||||
|
Expect(reasoning).To(Equal(strings.TrimSpace(longReasoning)))
|
||||||
|
Expect(cleaned).To(Equal("Text More"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should handle reasoning with unicode characters", func() {
|
||||||
|
content := "Text <thinking>Reasoning with 中文 and emoji 🧠</thinking> More"
|
||||||
|
reasoning, cleaned := Extract(content)
|
||||||
|
Expect(reasoning).To(Equal("Reasoning with 中文 and emoji 🧠"))
|
||||||
|
Expect(cleaned).To(Equal("Text More"))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
Context("with WithThinkingForcedOpen option", func() {
|
||||||
|
It("should treat all content as reasoning until closing tag", func() {
|
||||||
|
content := "This is reasoning</think>this is content"
|
||||||
|
reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
|
||||||
|
Expect(reasoning).To(Equal("This is reasoning"))
|
||||||
|
Expect(cleaned).To(Equal("this is content"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should treat all content as reasoning when no closing tag (streaming)", func() {
|
||||||
|
content := "This is reasoning content still streaming"
|
||||||
|
reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
|
||||||
|
Expect(reasoning).To(Equal("This is reasoning content still streaming"))
|
||||||
|
Expect(cleaned).To(BeEmpty())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should handle GLM-4 style output", func() {
|
||||||
|
content := "**Analyze:** The user says something.\n\n**Final Decision:** Output the text.</think>this is a test"
|
||||||
|
reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
|
||||||
|
Expect(reasoning).To(Equal("**Analyze:** The user says something.\n\n**Final Decision:** Output the text."))
|
||||||
|
Expect(cleaned).To(Equal("this is a test"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should handle multiline reasoning with closing tag", func() {
|
||||||
|
content := "1. First point\n2. Second point\n3. Third point</think>Final answer"
|
||||||
|
reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
|
||||||
|
Expect(reasoning).To(Equal("1. First point\n2. Second point\n3. Third point"))
|
||||||
|
Expect(cleaned).To(Equal("Final answer"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should handle </thinking> closing tag", func() {
|
||||||
|
content := "Some reasoning here</thinking>actual content"
|
||||||
|
reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
|
||||||
|
Expect(reasoning).To(Equal("Some reasoning here"))
|
||||||
|
Expect(cleaned).To(Equal("actual content"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should handle additional reasoning blocks after initial forced open", func() {
|
||||||
|
content := "Initial reasoning</think>content<think>more reasoning</think>final content"
|
||||||
|
reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
|
||||||
|
Expect(reasoning).To(Equal("Initial reasoning\n\nmore reasoning"))
|
||||||
|
Expect(cleaned).To(Equal("contentfinal content"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should handle empty content", func() {
|
||||||
|
reasoning, cleaned := Extract("", WithThinkingForcedOpen())
|
||||||
|
Expect(reasoning).To(BeEmpty())
|
||||||
|
Expect(cleaned).To(BeEmpty())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should handle only closing tag", func() {
|
||||||
|
content := "</think>only content"
|
||||||
|
reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
|
||||||
|
Expect(reasoning).To(BeEmpty())
|
||||||
|
Expect(cleaned).To(Equal("only content"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should find earliest closing tag", func() {
|
||||||
|
// </think> comes before </thinking>
|
||||||
|
content := "Reasoning</think>content</thinking>more"
|
||||||
|
reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
|
||||||
|
Expect(reasoning).To(Equal("Reasoning"))
|
||||||
|
Expect(cleaned).To(Equal("content</thinking>more"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should handle Command R7B closing tag", func() {
|
||||||
|
content := "Reasoning content<|END_THINKING|>actual response"
|
||||||
|
reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
|
||||||
|
Expect(reasoning).To(Equal("Reasoning content"))
|
||||||
|
Expect(cleaned).To(Equal("actual response"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should handle Apertus closing tag", func() {
|
||||||
|
content := "Reasoning content<|inner_suffix|>actual response"
|
||||||
|
reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
|
||||||
|
Expect(reasoning).To(Equal("Reasoning content"))
|
||||||
|
Expect(cleaned).To(Equal("actual response"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should handle Seed closing tag", func() {
|
||||||
|
content := "Reasoning content</seed:think>actual response"
|
||||||
|
reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
|
||||||
|
Expect(reasoning).To(Equal("Reasoning content"))
|
||||||
|
Expect(cleaned).To(Equal("actual response"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should handle Magistral closing tag", func() {
|
||||||
|
content := "Reasoning content[/THINK]actual response"
|
||||||
|
reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
|
||||||
|
Expect(reasoning).To(Equal("Reasoning content"))
|
||||||
|
Expect(cleaned).To(Equal("actual response"))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
Context("with model-specific tag pairs", func() {
|
||||||
|
It("should extract Command R7B reasoning tags", func() {
|
||||||
|
content := "Before <|START_THINKING|>reasoning here<|END_THINKING|> After"
|
||||||
|
reasoning, cleaned := Extract(content)
|
||||||
|
Expect(reasoning).To(Equal("reasoning here"))
|
||||||
|
Expect(cleaned).To(Equal("Before After"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should extract Apertus reasoning tags", func() {
|
||||||
|
content := "Before <|inner_prefix|>reasoning here<|inner_suffix|> After"
|
||||||
|
reasoning, cleaned := Extract(content)
|
||||||
|
Expect(reasoning).To(Equal("reasoning here"))
|
||||||
|
Expect(cleaned).To(Equal("Before After"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should extract Seed reasoning tags", func() {
|
||||||
|
content := "Before <seed:think>reasoning here</seed:think> After"
|
||||||
|
reasoning, cleaned := Extract(content)
|
||||||
|
Expect(reasoning).To(Equal("reasoning here"))
|
||||||
|
Expect(cleaned).To(Equal("Before After"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should extract Magistral reasoning tags", func() {
|
||||||
|
content := "Before [THINK]reasoning here[/THINK] After"
|
||||||
|
reasoning, cleaned := Extract(content)
|
||||||
|
Expect(reasoning).To(Equal("reasoning here"))
|
||||||
|
Expect(cleaned).To(Equal("Before After"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should handle unclosed Command R7B tag", func() {
|
||||||
|
content := "Before <|START_THINKING|>reasoning still streaming"
|
||||||
|
reasoning, cleaned := Extract(content)
|
||||||
|
Expect(reasoning).To(Equal("reasoning still streaming"))
|
||||||
|
Expect(cleaned).To(Equal("Before "))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should handle unclosed Apertus tag", func() {
|
||||||
|
content := "Before <|inner_prefix|>reasoning still streaming"
|
||||||
|
reasoning, cleaned := Extract(content)
|
||||||
|
Expect(reasoning).To(Equal("reasoning still streaming"))
|
||||||
|
Expect(cleaned).To(Equal("Before "))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should handle unclosed Seed tag", func() {
|
||||||
|
content := "Before <seed:think>reasoning still streaming"
|
||||||
|
reasoning, cleaned := Extract(content)
|
||||||
|
Expect(reasoning).To(Equal("reasoning still streaming"))
|
||||||
|
Expect(cleaned).To(Equal("Before "))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should handle unclosed Magistral tag", func() {
|
||||||
|
content := "Before [THINK]reasoning still streaming"
|
||||||
|
reasoning, cleaned := Extract(content)
|
||||||
|
Expect(reasoning).To(Equal("reasoning still streaming"))
|
||||||
|
Expect(cleaned).To(Equal("Before "))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should handle closing-only Command R7B tag", func() {
|
||||||
|
content := "Reasoning content<|END_THINKING|>actual response"
|
||||||
|
reasoning, cleaned := Extract(content)
|
||||||
|
Expect(reasoning).To(Equal("Reasoning content"))
|
||||||
|
Expect(cleaned).To(Equal("actual response"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should handle closing-only Apertus tag", func() {
|
||||||
|
content := "Reasoning content<|inner_suffix|>actual response"
|
||||||
|
reasoning, cleaned := Extract(content)
|
||||||
|
Expect(reasoning).To(Equal("Reasoning content"))
|
||||||
|
Expect(cleaned).To(Equal("actual response"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should handle closing-only Seed tag", func() {
|
||||||
|
content := "Reasoning content</seed:think>actual response"
|
||||||
|
reasoning, cleaned := Extract(content)
|
||||||
|
Expect(reasoning).To(Equal("Reasoning content"))
|
||||||
|
Expect(cleaned).To(Equal("actual response"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should handle closing-only Magistral tag", func() {
|
||||||
|
content := "Reasoning content[/THINK]actual response"
|
||||||
|
reasoning, cleaned := Extract(content)
|
||||||
|
Expect(reasoning).To(Equal("Reasoning content"))
|
||||||
|
Expect(cleaned).To(Equal("actual response"))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
534
swagger/docs.go
534
swagger/docs.go
@@ -1259,6 +1259,116 @@ const docTemplate = `{
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"/v1/responses": {
|
||||||
|
"post": {
|
||||||
|
"summary": "Create a response using the Open Responses API",
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"description": "Request body",
|
||||||
|
"name": "request",
|
||||||
|
"in": "body",
|
||||||
|
"required": true,
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/definitions/schema.OpenResponsesRequest"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "Response",
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/definitions/schema.ORResponseResource"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"/v1/responses/{id}": {
|
||||||
|
"get": {
|
||||||
|
"description": "Retrieve a response by ID. Can be used for polling background responses or resuming streaming responses.",
|
||||||
|
"summary": "Get a response by ID",
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"type": "string",
|
||||||
|
"description": "Response ID",
|
||||||
|
"name": "id",
|
||||||
|
"in": "path",
|
||||||
|
"required": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "string",
|
||||||
|
"description": "Set to 'true' to resume streaming",
|
||||||
|
"name": "stream",
|
||||||
|
"in": "query"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "integer",
|
||||||
|
"description": "Sequence number to resume from (for streaming)",
|
||||||
|
"name": "starting_after",
|
||||||
|
"in": "query"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "Response",
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/definitions/schema.ORResponseResource"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"400": {
|
||||||
|
"description": "Bad Request",
|
||||||
|
"schema": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"404": {
|
||||||
|
"description": "Not Found",
|
||||||
|
"schema": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"/v1/responses/{id}/cancel": {
|
||||||
|
"post": {
|
||||||
|
"description": "Cancel a background response if it's still in progress",
|
||||||
|
"summary": "Cancel a response",
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"type": "string",
|
||||||
|
"description": "Response ID",
|
||||||
|
"name": "id",
|
||||||
|
"in": "path",
|
||||||
|
"required": true
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "Response",
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/definitions/schema.ORResponseResource"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"400": {
|
||||||
|
"description": "Bad Request",
|
||||||
|
"schema": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"404": {
|
||||||
|
"description": "Not Found",
|
||||||
|
"schema": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"/v1/sound-generation": {
|
"/v1/sound-generation": {
|
||||||
"post": {
|
"post": {
|
||||||
"summary": "Generates audio from the input text.",
|
"summary": "Generates audio from the input text.",
|
||||||
@@ -2507,6 +2617,322 @@ const docTemplate = `{
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"schema.ORError": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"code": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"message": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"param": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"type": {
|
||||||
|
"description": "invalid_request|not_found|server_error|model_error|too_many_requests",
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"schema.ORFunctionTool": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"description": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"name": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"parameters": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": true
|
||||||
|
},
|
||||||
|
"strict": {
|
||||||
|
"description": "Always include in response",
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"type": {
|
||||||
|
"description": "always \"function\"",
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"schema.ORIncompleteDetails": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"reason": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"schema.ORInputTokensDetails": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"cached_tokens": {
|
||||||
|
"description": "Always include, even if 0",
|
||||||
|
"type": "integer"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"schema.ORItemField": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"arguments": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"call_id": {
|
||||||
|
"description": "Function call fields",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"content": {
|
||||||
|
"description": "string or []ORContentPart for messages"
|
||||||
|
},
|
||||||
|
"id": {
|
||||||
|
"description": "Present for all output items",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"name": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"output": {
|
||||||
|
"description": "Function call output fields"
|
||||||
|
},
|
||||||
|
"role": {
|
||||||
|
"description": "Message fields",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"status": {
|
||||||
|
"description": "in_progress|completed|incomplete",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"type": {
|
||||||
|
"description": "message|function_call|function_call_output|reasoning|item_reference",
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"schema.OROutputTokensDetails": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"reasoning_tokens": {
|
||||||
|
"description": "Always include, even if 0",
|
||||||
|
"type": "integer"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"schema.ORReasoning": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"effort": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"summary": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"schema.ORReasoningParam": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"effort": {
|
||||||
|
"description": "\"none\"|\"low\"|\"medium\"|\"high\"|\"xhigh\"",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"summary": {
|
||||||
|
"description": "\"auto\"|\"concise\"|\"detailed\"",
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"schema.ORResponseResource": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"background": {
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"completed_at": {
|
||||||
|
"description": "Required: present as number or null",
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"created_at": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"error": {
|
||||||
|
"description": "Always present, null if no error",
|
||||||
|
"allOf": [
|
||||||
|
{
|
||||||
|
"$ref": "#/definitions/schema.ORError"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"frequency_penalty": {
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
"id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"incomplete_details": {
|
||||||
|
"description": "Always present, null if complete",
|
||||||
|
"allOf": [
|
||||||
|
{
|
||||||
|
"$ref": "#/definitions/schema.ORIncompleteDetails"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"instructions": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"max_output_tokens": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"max_tool_calls": {
|
||||||
|
"description": "nullable",
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"metadata": {
|
||||||
|
"description": "Metadata and operational flags",
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"model": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"object": {
|
||||||
|
"description": "always \"response\"",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"output": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"$ref": "#/definitions/schema.ORItemField"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"parallel_tool_calls": {
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"presence_penalty": {
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
"previous_response_id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"prompt_cache_key": {
|
||||||
|
"description": "nullable",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"reasoning": {
|
||||||
|
"description": "nullable",
|
||||||
|
"allOf": [
|
||||||
|
{
|
||||||
|
"$ref": "#/definitions/schema.ORReasoning"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"safety_identifier": {
|
||||||
|
"description": "Safety and caching",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"service_tier": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"status": {
|
||||||
|
"description": "in_progress|completed|failed|incomplete",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"store": {
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"temperature": {
|
||||||
|
"description": "Sampling parameters (always required)",
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
"text": {
|
||||||
|
"description": "Text format configuration",
|
||||||
|
"allOf": [
|
||||||
|
{
|
||||||
|
"$ref": "#/definitions/schema.ORTextConfig"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"tool_choice": {},
|
||||||
|
"tools": {
|
||||||
|
"description": "Tool-related fields",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"$ref": "#/definitions/schema.ORFunctionTool"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"top_logprobs": {
|
||||||
|
"description": "Default to 0",
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"top_p": {
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
"truncation": {
|
||||||
|
"description": "Truncation and reasoning",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"usage": {
|
||||||
|
"description": "Usage statistics",
|
||||||
|
"allOf": [
|
||||||
|
{
|
||||||
|
"$ref": "#/definitions/schema.ORUsage"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"schema.ORTextConfig": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"format": {
|
||||||
|
"$ref": "#/definitions/schema.ORTextFormat"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"schema.ORTextFormat": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"type": {
|
||||||
|
"description": "\"text\" or \"json_schema\"",
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"schema.ORUsage": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"input_tokens": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"input_tokens_details": {
|
||||||
|
"description": "Always present",
|
||||||
|
"allOf": [
|
||||||
|
{
|
||||||
|
"$ref": "#/definitions/schema.ORInputTokensDetails"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"output_tokens": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"output_tokens_details": {
|
||||||
|
"description": "Always present",
|
||||||
|
"allOf": [
|
||||||
|
{
|
||||||
|
"$ref": "#/definitions/schema.OROutputTokensDetails"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"total_tokens": {
|
||||||
|
"type": "integer"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"schema.OpenAIModel": {
|
"schema.OpenAIModel": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
@@ -2781,6 +3207,114 @@ const docTemplate = `{
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"schema.OpenResponsesRequest": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"allowed_tools": {
|
||||||
|
"description": "Restrict which tools can be invoked",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"background": {
|
||||||
|
"description": "Run request in background",
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"frequency_penalty": {
|
||||||
|
"description": "Frequency penalty (-2.0 to 2.0)",
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
"include": {
|
||||||
|
"description": "What to include in response",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"input": {
|
||||||
|
"description": "string or []ORItemParam"
|
||||||
|
},
|
||||||
|
"instructions": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"logit_bias": {
|
||||||
|
"description": "OpenAI-compatible extensions (not in Open Responses spec)",
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": {
|
||||||
|
"type": "number",
|
||||||
|
"format": "float64"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"max_output_tokens": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"max_tool_calls": {
|
||||||
|
"description": "Maximum number of tool calls",
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"metadata": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"model": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"parallel_tool_calls": {
|
||||||
|
"description": "Allow parallel tool calls",
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"presence_penalty": {
|
||||||
|
"description": "Presence penalty (-2.0 to 2.0)",
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
"previous_response_id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"reasoning": {
|
||||||
|
"$ref": "#/definitions/schema.ORReasoningParam"
|
||||||
|
},
|
||||||
|
"service_tier": {
|
||||||
|
"description": "\"auto\"|\"default\"|priority hint",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"store": {
|
||||||
|
"description": "Whether to store the response",
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"stream": {
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"temperature": {
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
"text_format": {
|
||||||
|
"description": "Additional parameters from spec"
|
||||||
|
},
|
||||||
|
"tool_choice": {
|
||||||
|
"description": "\"auto\"|\"required\"|\"none\"|{type:\"function\",name:\"...\"}"
|
||||||
|
},
|
||||||
|
"tools": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"$ref": "#/definitions/schema.ORFunctionTool"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"top_logprobs": {
|
||||||
|
"description": "Number of top logprobs to return",
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"top_p": {
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
"truncation": {
|
||||||
|
"description": "\"auto\"|\"disabled\"",
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"schema.P2PNodesResponse": {
|
"schema.P2PNodesResponse": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
|
|||||||
@@ -1252,6 +1252,116 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"/v1/responses": {
|
||||||
|
"post": {
|
||||||
|
"summary": "Create a response using the Open Responses API",
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"description": "Request body",
|
||||||
|
"name": "request",
|
||||||
|
"in": "body",
|
||||||
|
"required": true,
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/definitions/schema.OpenResponsesRequest"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "Response",
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/definitions/schema.ORResponseResource"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"/v1/responses/{id}": {
|
||||||
|
"get": {
|
||||||
|
"description": "Retrieve a response by ID. Can be used for polling background responses or resuming streaming responses.",
|
||||||
|
"summary": "Get a response by ID",
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"type": "string",
|
||||||
|
"description": "Response ID",
|
||||||
|
"name": "id",
|
||||||
|
"in": "path",
|
||||||
|
"required": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "string",
|
||||||
|
"description": "Set to 'true' to resume streaming",
|
||||||
|
"name": "stream",
|
||||||
|
"in": "query"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "integer",
|
||||||
|
"description": "Sequence number to resume from (for streaming)",
|
||||||
|
"name": "starting_after",
|
||||||
|
"in": "query"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "Response",
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/definitions/schema.ORResponseResource"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"400": {
|
||||||
|
"description": "Bad Request",
|
||||||
|
"schema": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"404": {
|
||||||
|
"description": "Not Found",
|
||||||
|
"schema": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"/v1/responses/{id}/cancel": {
|
||||||
|
"post": {
|
||||||
|
"description": "Cancel a background response if it's still in progress",
|
||||||
|
"summary": "Cancel a response",
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"type": "string",
|
||||||
|
"description": "Response ID",
|
||||||
|
"name": "id",
|
||||||
|
"in": "path",
|
||||||
|
"required": true
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "Response",
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/definitions/schema.ORResponseResource"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"400": {
|
||||||
|
"description": "Bad Request",
|
||||||
|
"schema": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"404": {
|
||||||
|
"description": "Not Found",
|
||||||
|
"schema": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"/v1/sound-generation": {
|
"/v1/sound-generation": {
|
||||||
"post": {
|
"post": {
|
||||||
"summary": "Generates audio from the input text.",
|
"summary": "Generates audio from the input text.",
|
||||||
@@ -2500,6 +2610,322 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"schema.ORError": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"code": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"message": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"param": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"type": {
|
||||||
|
"description": "invalid_request|not_found|server_error|model_error|too_many_requests",
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"schema.ORFunctionTool": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"description": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"name": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"parameters": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": true
|
||||||
|
},
|
||||||
|
"strict": {
|
||||||
|
"description": "Always include in response",
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"type": {
|
||||||
|
"description": "always \"function\"",
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"schema.ORIncompleteDetails": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"reason": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"schema.ORInputTokensDetails": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"cached_tokens": {
|
||||||
|
"description": "Always include, even if 0",
|
||||||
|
"type": "integer"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"schema.ORItemField": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"arguments": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"call_id": {
|
||||||
|
"description": "Function call fields",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"content": {
|
||||||
|
"description": "string or []ORContentPart for messages"
|
||||||
|
},
|
||||||
|
"id": {
|
||||||
|
"description": "Present for all output items",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"name": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"output": {
|
||||||
|
"description": "Function call output fields"
|
||||||
|
},
|
||||||
|
"role": {
|
||||||
|
"description": "Message fields",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"status": {
|
||||||
|
"description": "in_progress|completed|incomplete",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"type": {
|
||||||
|
"description": "message|function_call|function_call_output|reasoning|item_reference",
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"schema.OROutputTokensDetails": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"reasoning_tokens": {
|
||||||
|
"description": "Always include, even if 0",
|
||||||
|
"type": "integer"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"schema.ORReasoning": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"effort": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"summary": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"schema.ORReasoningParam": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"effort": {
|
||||||
|
"description": "\"none\"|\"low\"|\"medium\"|\"high\"|\"xhigh\"",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"summary": {
|
||||||
|
"description": "\"auto\"|\"concise\"|\"detailed\"",
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"schema.ORResponseResource": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"background": {
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"completed_at": {
|
||||||
|
"description": "Required: present as number or null",
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"created_at": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"error": {
|
||||||
|
"description": "Always present, null if no error",
|
||||||
|
"allOf": [
|
||||||
|
{
|
||||||
|
"$ref": "#/definitions/schema.ORError"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"frequency_penalty": {
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
"id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"incomplete_details": {
|
||||||
|
"description": "Always present, null if complete",
|
||||||
|
"allOf": [
|
||||||
|
{
|
||||||
|
"$ref": "#/definitions/schema.ORIncompleteDetails"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"instructions": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"max_output_tokens": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"max_tool_calls": {
|
||||||
|
"description": "nullable",
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"metadata": {
|
||||||
|
"description": "Metadata and operational flags",
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"model": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"object": {
|
||||||
|
"description": "always \"response\"",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"output": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"$ref": "#/definitions/schema.ORItemField"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"parallel_tool_calls": {
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"presence_penalty": {
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
"previous_response_id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"prompt_cache_key": {
|
||||||
|
"description": "nullable",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"reasoning": {
|
||||||
|
"description": "nullable",
|
||||||
|
"allOf": [
|
||||||
|
{
|
||||||
|
"$ref": "#/definitions/schema.ORReasoning"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"safety_identifier": {
|
||||||
|
"description": "Safety and caching",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"service_tier": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"status": {
|
||||||
|
"description": "in_progress|completed|failed|incomplete",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"store": {
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"temperature": {
|
||||||
|
"description": "Sampling parameters (always required)",
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
"text": {
|
||||||
|
"description": "Text format configuration",
|
||||||
|
"allOf": [
|
||||||
|
{
|
||||||
|
"$ref": "#/definitions/schema.ORTextConfig"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"tool_choice": {},
|
||||||
|
"tools": {
|
||||||
|
"description": "Tool-related fields",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"$ref": "#/definitions/schema.ORFunctionTool"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"top_logprobs": {
|
||||||
|
"description": "Default to 0",
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"top_p": {
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
"truncation": {
|
||||||
|
"description": "Truncation and reasoning",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"usage": {
|
||||||
|
"description": "Usage statistics",
|
||||||
|
"allOf": [
|
||||||
|
{
|
||||||
|
"$ref": "#/definitions/schema.ORUsage"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"schema.ORTextConfig": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"format": {
|
||||||
|
"$ref": "#/definitions/schema.ORTextFormat"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"schema.ORTextFormat": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"type": {
|
||||||
|
"description": "\"text\" or \"json_schema\"",
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"schema.ORUsage": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"input_tokens": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"input_tokens_details": {
|
||||||
|
"description": "Always present",
|
||||||
|
"allOf": [
|
||||||
|
{
|
||||||
|
"$ref": "#/definitions/schema.ORInputTokensDetails"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"output_tokens": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"output_tokens_details": {
|
||||||
|
"description": "Always present",
|
||||||
|
"allOf": [
|
||||||
|
{
|
||||||
|
"$ref": "#/definitions/schema.OROutputTokensDetails"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"total_tokens": {
|
||||||
|
"type": "integer"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"schema.OpenAIModel": {
|
"schema.OpenAIModel": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
@@ -2774,6 +3200,114 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"schema.OpenResponsesRequest": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"allowed_tools": {
|
||||||
|
"description": "Restrict which tools can be invoked",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"background": {
|
||||||
|
"description": "Run request in background",
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"frequency_penalty": {
|
||||||
|
"description": "Frequency penalty (-2.0 to 2.0)",
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
"include": {
|
||||||
|
"description": "What to include in response",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"input": {
|
||||||
|
"description": "string or []ORItemParam"
|
||||||
|
},
|
||||||
|
"instructions": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"logit_bias": {
|
||||||
|
"description": "OpenAI-compatible extensions (not in Open Responses spec)",
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": {
|
||||||
|
"type": "number",
|
||||||
|
"format": "float64"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"max_output_tokens": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"max_tool_calls": {
|
||||||
|
"description": "Maximum number of tool calls",
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"metadata": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"model": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"parallel_tool_calls": {
|
||||||
|
"description": "Allow parallel tool calls",
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"presence_penalty": {
|
||||||
|
"description": "Presence penalty (-2.0 to 2.0)",
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
"previous_response_id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"reasoning": {
|
||||||
|
"$ref": "#/definitions/schema.ORReasoningParam"
|
||||||
|
},
|
||||||
|
"service_tier": {
|
||||||
|
"description": "\"auto\"|\"default\"|priority hint",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"store": {
|
||||||
|
"description": "Whether to store the response",
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"stream": {
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"temperature": {
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
"text_format": {
|
||||||
|
"description": "Additional parameters from spec"
|
||||||
|
},
|
||||||
|
"tool_choice": {
|
||||||
|
"description": "\"auto\"|\"required\"|\"none\"|{type:\"function\",name:\"...\"}"
|
||||||
|
},
|
||||||
|
"tools": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"$ref": "#/definitions/schema.ORFunctionTool"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"top_logprobs": {
|
||||||
|
"description": "Number of top logprobs to return",
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"top_p": {
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
"truncation": {
|
||||||
|
"description": "\"auto\"|\"disabled\"",
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"schema.P2PNodesResponse": {
|
"schema.P2PNodesResponse": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
|
|||||||
@@ -742,6 +742,212 @@ definitions:
|
|||||||
tunnelAddress:
|
tunnelAddress:
|
||||||
type: string
|
type: string
|
||||||
type: object
|
type: object
|
||||||
|
schema.ORError:
|
||||||
|
properties:
|
||||||
|
code:
|
||||||
|
type: string
|
||||||
|
message:
|
||||||
|
type: string
|
||||||
|
param:
|
||||||
|
type: string
|
||||||
|
type:
|
||||||
|
description: invalid_request|not_found|server_error|model_error|too_many_requests
|
||||||
|
type: string
|
||||||
|
type: object
|
||||||
|
schema.ORFunctionTool:
|
||||||
|
properties:
|
||||||
|
description:
|
||||||
|
type: string
|
||||||
|
name:
|
||||||
|
type: string
|
||||||
|
parameters:
|
||||||
|
additionalProperties: true
|
||||||
|
type: object
|
||||||
|
strict:
|
||||||
|
description: Always include in response
|
||||||
|
type: boolean
|
||||||
|
type:
|
||||||
|
description: always "function"
|
||||||
|
type: string
|
||||||
|
type: object
|
||||||
|
schema.ORIncompleteDetails:
|
||||||
|
properties:
|
||||||
|
reason:
|
||||||
|
type: string
|
||||||
|
type: object
|
||||||
|
schema.ORInputTokensDetails:
|
||||||
|
properties:
|
||||||
|
cached_tokens:
|
||||||
|
description: Always include, even if 0
|
||||||
|
type: integer
|
||||||
|
type: object
|
||||||
|
schema.ORItemField:
|
||||||
|
properties:
|
||||||
|
arguments:
|
||||||
|
type: string
|
||||||
|
call_id:
|
||||||
|
description: Function call fields
|
||||||
|
type: string
|
||||||
|
content:
|
||||||
|
description: string or []ORContentPart for messages
|
||||||
|
id:
|
||||||
|
description: Present for all output items
|
||||||
|
type: string
|
||||||
|
name:
|
||||||
|
type: string
|
||||||
|
output:
|
||||||
|
description: Function call output fields
|
||||||
|
role:
|
||||||
|
description: Message fields
|
||||||
|
type: string
|
||||||
|
status:
|
||||||
|
description: in_progress|completed|incomplete
|
||||||
|
type: string
|
||||||
|
type:
|
||||||
|
description: message|function_call|function_call_output|reasoning|item_reference
|
||||||
|
type: string
|
||||||
|
type: object
|
||||||
|
schema.OROutputTokensDetails:
|
||||||
|
properties:
|
||||||
|
reasoning_tokens:
|
||||||
|
description: Always include, even if 0
|
||||||
|
type: integer
|
||||||
|
type: object
|
||||||
|
schema.ORReasoning:
|
||||||
|
properties:
|
||||||
|
effort:
|
||||||
|
type: string
|
||||||
|
summary:
|
||||||
|
type: string
|
||||||
|
type: object
|
||||||
|
schema.ORReasoningParam:
|
||||||
|
properties:
|
||||||
|
effort:
|
||||||
|
description: '"none"|"low"|"medium"|"high"|"xhigh"'
|
||||||
|
type: string
|
||||||
|
summary:
|
||||||
|
description: '"auto"|"concise"|"detailed"'
|
||||||
|
type: string
|
||||||
|
type: object
|
||||||
|
schema.ORResponseResource:
|
||||||
|
properties:
|
||||||
|
background:
|
||||||
|
type: boolean
|
||||||
|
completed_at:
|
||||||
|
description: 'Required: present as number or null'
|
||||||
|
type: integer
|
||||||
|
created_at:
|
||||||
|
type: integer
|
||||||
|
error:
|
||||||
|
allOf:
|
||||||
|
- $ref: '#/definitions/schema.ORError'
|
||||||
|
description: Always present, null if no error
|
||||||
|
frequency_penalty:
|
||||||
|
type: number
|
||||||
|
id:
|
||||||
|
type: string
|
||||||
|
incomplete_details:
|
||||||
|
allOf:
|
||||||
|
- $ref: '#/definitions/schema.ORIncompleteDetails'
|
||||||
|
description: Always present, null if complete
|
||||||
|
instructions:
|
||||||
|
type: string
|
||||||
|
max_output_tokens:
|
||||||
|
type: integer
|
||||||
|
max_tool_calls:
|
||||||
|
description: nullable
|
||||||
|
type: integer
|
||||||
|
metadata:
|
||||||
|
additionalProperties:
|
||||||
|
type: string
|
||||||
|
description: Metadata and operational flags
|
||||||
|
type: object
|
||||||
|
model:
|
||||||
|
type: string
|
||||||
|
object:
|
||||||
|
description: always "response"
|
||||||
|
type: string
|
||||||
|
output:
|
||||||
|
items:
|
||||||
|
$ref: '#/definitions/schema.ORItemField'
|
||||||
|
type: array
|
||||||
|
parallel_tool_calls:
|
||||||
|
type: boolean
|
||||||
|
presence_penalty:
|
||||||
|
type: number
|
||||||
|
previous_response_id:
|
||||||
|
type: string
|
||||||
|
prompt_cache_key:
|
||||||
|
description: nullable
|
||||||
|
type: string
|
||||||
|
reasoning:
|
||||||
|
allOf:
|
||||||
|
- $ref: '#/definitions/schema.ORReasoning'
|
||||||
|
description: nullable
|
||||||
|
safety_identifier:
|
||||||
|
description: Safety and caching
|
||||||
|
type: string
|
||||||
|
service_tier:
|
||||||
|
type: string
|
||||||
|
status:
|
||||||
|
description: in_progress|completed|failed|incomplete
|
||||||
|
type: string
|
||||||
|
store:
|
||||||
|
type: boolean
|
||||||
|
temperature:
|
||||||
|
description: Sampling parameters (always required)
|
||||||
|
type: number
|
||||||
|
text:
|
||||||
|
allOf:
|
||||||
|
- $ref: '#/definitions/schema.ORTextConfig'
|
||||||
|
description: Text format configuration
|
||||||
|
tool_choice: {}
|
||||||
|
tools:
|
||||||
|
description: Tool-related fields
|
||||||
|
items:
|
||||||
|
$ref: '#/definitions/schema.ORFunctionTool'
|
||||||
|
type: array
|
||||||
|
top_logprobs:
|
||||||
|
description: Default to 0
|
||||||
|
type: integer
|
||||||
|
top_p:
|
||||||
|
type: number
|
||||||
|
truncation:
|
||||||
|
description: Truncation and reasoning
|
||||||
|
type: string
|
||||||
|
usage:
|
||||||
|
allOf:
|
||||||
|
- $ref: '#/definitions/schema.ORUsage'
|
||||||
|
description: Usage statistics
|
||||||
|
type: object
|
||||||
|
schema.ORTextConfig:
|
||||||
|
properties:
|
||||||
|
format:
|
||||||
|
$ref: '#/definitions/schema.ORTextFormat'
|
||||||
|
type: object
|
||||||
|
schema.ORTextFormat:
|
||||||
|
properties:
|
||||||
|
type:
|
||||||
|
description: '"text" or "json_schema"'
|
||||||
|
type: string
|
||||||
|
type: object
|
||||||
|
schema.ORUsage:
|
||||||
|
properties:
|
||||||
|
input_tokens:
|
||||||
|
type: integer
|
||||||
|
input_tokens_details:
|
||||||
|
allOf:
|
||||||
|
- $ref: '#/definitions/schema.ORInputTokensDetails'
|
||||||
|
description: Always present
|
||||||
|
output_tokens:
|
||||||
|
type: integer
|
||||||
|
output_tokens_details:
|
||||||
|
allOf:
|
||||||
|
- $ref: '#/definitions/schema.OROutputTokensDetails'
|
||||||
|
description: Always present
|
||||||
|
total_tokens:
|
||||||
|
type: integer
|
||||||
|
type: object
|
||||||
schema.OpenAIModel:
|
schema.OpenAIModel:
|
||||||
properties:
|
properties:
|
||||||
id:
|
id:
|
||||||
@@ -936,6 +1142,82 @@ definitions:
|
|||||||
total_tokens:
|
total_tokens:
|
||||||
type: integer
|
type: integer
|
||||||
type: object
|
type: object
|
||||||
|
schema.OpenResponsesRequest:
|
||||||
|
properties:
|
||||||
|
allowed_tools:
|
||||||
|
description: Restrict which tools can be invoked
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
|
background:
|
||||||
|
description: Run request in background
|
||||||
|
type: boolean
|
||||||
|
frequency_penalty:
|
||||||
|
description: Frequency penalty (-2.0 to 2.0)
|
||||||
|
type: number
|
||||||
|
include:
|
||||||
|
description: What to include in response
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
|
input:
|
||||||
|
description: string or []ORItemParam
|
||||||
|
instructions:
|
||||||
|
type: string
|
||||||
|
logit_bias:
|
||||||
|
additionalProperties:
|
||||||
|
format: float64
|
||||||
|
type: number
|
||||||
|
description: OpenAI-compatible extensions (not in Open Responses spec)
|
||||||
|
type: object
|
||||||
|
max_output_tokens:
|
||||||
|
type: integer
|
||||||
|
max_tool_calls:
|
||||||
|
description: Maximum number of tool calls
|
||||||
|
type: integer
|
||||||
|
metadata:
|
||||||
|
additionalProperties:
|
||||||
|
type: string
|
||||||
|
type: object
|
||||||
|
model:
|
||||||
|
type: string
|
||||||
|
parallel_tool_calls:
|
||||||
|
description: Allow parallel tool calls
|
||||||
|
type: boolean
|
||||||
|
presence_penalty:
|
||||||
|
description: Presence penalty (-2.0 to 2.0)
|
||||||
|
type: number
|
||||||
|
previous_response_id:
|
||||||
|
type: string
|
||||||
|
reasoning:
|
||||||
|
$ref: '#/definitions/schema.ORReasoningParam'
|
||||||
|
service_tier:
|
||||||
|
description: '"auto"|"default"|priority hint'
|
||||||
|
type: string
|
||||||
|
store:
|
||||||
|
description: Whether to store the response
|
||||||
|
type: boolean
|
||||||
|
stream:
|
||||||
|
type: boolean
|
||||||
|
temperature:
|
||||||
|
type: number
|
||||||
|
text_format:
|
||||||
|
description: Additional parameters from spec
|
||||||
|
tool_choice:
|
||||||
|
description: '"auto"|"required"|"none"|{type:"function",name:"..."}'
|
||||||
|
tools:
|
||||||
|
items:
|
||||||
|
$ref: '#/definitions/schema.ORFunctionTool'
|
||||||
|
type: array
|
||||||
|
top_logprobs:
|
||||||
|
description: Number of top logprobs to return
|
||||||
|
type: integer
|
||||||
|
top_p:
|
||||||
|
type: number
|
||||||
|
truncation:
|
||||||
|
description: '"auto"|"disabled"'
|
||||||
|
type: string
|
||||||
|
type: object
|
||||||
schema.P2PNodesResponse:
|
schema.P2PNodesResponse:
|
||||||
properties:
|
properties:
|
||||||
federated_nodes:
|
federated_nodes:
|
||||||
@@ -1962,6 +2244,80 @@ paths:
|
|||||||
schema:
|
schema:
|
||||||
$ref: '#/definitions/schema.JINARerankResponse'
|
$ref: '#/definitions/schema.JINARerankResponse'
|
||||||
summary: Reranks a list of phrases by relevance to a given text query.
|
summary: Reranks a list of phrases by relevance to a given text query.
|
||||||
|
/v1/responses:
|
||||||
|
post:
|
||||||
|
parameters:
|
||||||
|
- description: Request body
|
||||||
|
in: body
|
||||||
|
name: request
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
$ref: '#/definitions/schema.OpenResponsesRequest'
|
||||||
|
responses:
|
||||||
|
"200":
|
||||||
|
description: Response
|
||||||
|
schema:
|
||||||
|
$ref: '#/definitions/schema.ORResponseResource'
|
||||||
|
summary: Create a response using the Open Responses API
|
||||||
|
/v1/responses/{id}:
|
||||||
|
get:
|
||||||
|
description: Retrieve a response by ID. Can be used for polling background responses
|
||||||
|
or resuming streaming responses.
|
||||||
|
parameters:
|
||||||
|
- description: Response ID
|
||||||
|
in: path
|
||||||
|
name: id
|
||||||
|
required: true
|
||||||
|
type: string
|
||||||
|
- description: Set to 'true' to resume streaming
|
||||||
|
in: query
|
||||||
|
name: stream
|
||||||
|
type: string
|
||||||
|
- description: Sequence number to resume from (for streaming)
|
||||||
|
in: query
|
||||||
|
name: starting_after
|
||||||
|
type: integer
|
||||||
|
responses:
|
||||||
|
"200":
|
||||||
|
description: Response
|
||||||
|
schema:
|
||||||
|
$ref: '#/definitions/schema.ORResponseResource'
|
||||||
|
"400":
|
||||||
|
description: Bad Request
|
||||||
|
schema:
|
||||||
|
additionalProperties: true
|
||||||
|
type: object
|
||||||
|
"404":
|
||||||
|
description: Not Found
|
||||||
|
schema:
|
||||||
|
additionalProperties: true
|
||||||
|
type: object
|
||||||
|
summary: Get a response by ID
|
||||||
|
/v1/responses/{id}/cancel:
|
||||||
|
post:
|
||||||
|
description: Cancel a background response if it's still in progress
|
||||||
|
parameters:
|
||||||
|
- description: Response ID
|
||||||
|
in: path
|
||||||
|
name: id
|
||||||
|
required: true
|
||||||
|
type: string
|
||||||
|
responses:
|
||||||
|
"200":
|
||||||
|
description: Response
|
||||||
|
schema:
|
||||||
|
$ref: '#/definitions/schema.ORResponseResource'
|
||||||
|
"400":
|
||||||
|
description: Bad Request
|
||||||
|
schema:
|
||||||
|
additionalProperties: true
|
||||||
|
type: object
|
||||||
|
"404":
|
||||||
|
description: Not Found
|
||||||
|
schema:
|
||||||
|
additionalProperties: true
|
||||||
|
type: object
|
||||||
|
summary: Cancel a response
|
||||||
/v1/sound-generation:
|
/v1/sound-generation:
|
||||||
post:
|
post:
|
||||||
parameters:
|
parameters:
|
||||||
|
|||||||
Reference in New Issue
Block a user