feat(swagger): update swagger (#9660)

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
This commit is contained in:
LocalAI [bot]
2026-05-05 01:50:17 +02:00
committed by GitHub
parent 4ef45bbccd
commit a91b05907c
3 changed files with 346 additions and 0 deletions

View File

@@ -1156,6 +1156,130 @@ const docTemplate = `{
}
}
},
"/audio/transform": {
"post": {
"description": "Runs an audio-in / audio-out transform conditioned on an optional auxiliary reference signal. Concrete transforms include AEC + noise suppression + dereverberation (LocalVQE), voice conversion (reference = target speaker), and pitch shifting. The backend determines the operation; pass model-specific tuning via repeated ` + "`" + `params[\u003ckey\u003e]=\u003cvalue\u003e` + "`" + ` form fields.",
"consumes": [
"multipart/form-data"
],
"produces": [
"audio/x-wav"
],
"tags": [
"audio"
],
"summary": "Transform audio (echo cancellation, noise suppression, voice conversion, etc.)",
"parameters": [
{
"type": "string",
"description": "model",
"name": "model",
"in": "formData",
"required": true
},
{
"type": "file",
"description": "primary input audio file",
"name": "audio",
"in": "formData",
"required": true
},
{
"type": "file",
"description": "auxiliary reference audio (loopback for AEC, target voice for conversion, etc.)",
"name": "reference",
"in": "formData"
},
{
"type": "string",
"description": "wav | mp3 | ogg | flac",
"name": "response_format",
"in": "formData"
},
{
"type": "integer",
"description": "desired output sample rate",
"name": "sample_rate",
"in": "formData"
}
],
"responses": {
"200": {
"description": "transformed audio file",
"schema": {
"type": "string"
}
}
}
}
},
"/audio/transformations": {
"post": {
"description": "Runs an audio-in / audio-out transform conditioned on an optional auxiliary reference signal. Concrete transforms include AEC + noise suppression + dereverberation (LocalVQE), voice conversion (reference = target speaker), and pitch shifting. The backend determines the operation; pass model-specific tuning via repeated ` + "`" + `params[\u003ckey\u003e]=\u003cvalue\u003e` + "`" + ` form fields.",
"consumes": [
"multipart/form-data"
],
"produces": [
"audio/x-wav"
],
"tags": [
"audio"
],
"summary": "Transform audio (echo cancellation, noise suppression, voice conversion, etc.)",
"parameters": [
{
"type": "string",
"description": "model",
"name": "model",
"in": "formData",
"required": true
},
{
"type": "file",
"description": "primary input audio file",
"name": "audio",
"in": "formData",
"required": true
},
{
"type": "file",
"description": "auxiliary reference audio (loopback for AEC, target voice for conversion, etc.)",
"name": "reference",
"in": "formData"
},
{
"type": "string",
"description": "wav | mp3 | ogg | flac",
"name": "response_format",
"in": "formData"
},
{
"type": "integer",
"description": "desired output sample rate",
"name": "sample_rate",
"in": "formData"
}
],
"responses": {
"200": {
"description": "transformed audio file",
"schema": {
"type": "string"
}
}
}
}
},
"/audio/transformations/stream": {
"get": {
"description": "Streams binary PCM frames in (interleaved stereo: ch0=audio, ch1=reference) and out (mono). The first message must be a JSON ` + "`" + `session.update` + "`" + ` envelope describing model + sample format + frame size + backend params. Server emits binary PCM on the same cadence.",
"tags": [
"audio"
],
"summary": "Bidirectional realtime audio transform over WebSocket.",
"responses": {}
}
},
"/backend/monitor": {
"get": {
"tags": [

View File

@@ -1153,6 +1153,130 @@
}
}
},
"/audio/transform": {
"post": {
"description": "Runs an audio-in / audio-out transform conditioned on an optional auxiliary reference signal. Concrete transforms include AEC + noise suppression + dereverberation (LocalVQE), voice conversion (reference = target speaker), and pitch shifting. The backend determines the operation; pass model-specific tuning via repeated `params[\u003ckey\u003e]=\u003cvalue\u003e` form fields.",
"consumes": [
"multipart/form-data"
],
"produces": [
"audio/x-wav"
],
"tags": [
"audio"
],
"summary": "Transform audio (echo cancellation, noise suppression, voice conversion, etc.)",
"parameters": [
{
"type": "string",
"description": "model",
"name": "model",
"in": "formData",
"required": true
},
{
"type": "file",
"description": "primary input audio file",
"name": "audio",
"in": "formData",
"required": true
},
{
"type": "file",
"description": "auxiliary reference audio (loopback for AEC, target voice for conversion, etc.)",
"name": "reference",
"in": "formData"
},
{
"type": "string",
"description": "wav | mp3 | ogg | flac",
"name": "response_format",
"in": "formData"
},
{
"type": "integer",
"description": "desired output sample rate",
"name": "sample_rate",
"in": "formData"
}
],
"responses": {
"200": {
"description": "transformed audio file",
"schema": {
"type": "string"
}
}
}
}
},
"/audio/transformations": {
"post": {
"description": "Runs an audio-in / audio-out transform conditioned on an optional auxiliary reference signal. Concrete transforms include AEC + noise suppression + dereverberation (LocalVQE), voice conversion (reference = target speaker), and pitch shifting. The backend determines the operation; pass model-specific tuning via repeated `params[\u003ckey\u003e]=\u003cvalue\u003e` form fields.",
"consumes": [
"multipart/form-data"
],
"produces": [
"audio/x-wav"
],
"tags": [
"audio"
],
"summary": "Transform audio (echo cancellation, noise suppression, voice conversion, etc.)",
"parameters": [
{
"type": "string",
"description": "model",
"name": "model",
"in": "formData",
"required": true
},
{
"type": "file",
"description": "primary input audio file",
"name": "audio",
"in": "formData",
"required": true
},
{
"type": "file",
"description": "auxiliary reference audio (loopback for AEC, target voice for conversion, etc.)",
"name": "reference",
"in": "formData"
},
{
"type": "string",
"description": "wav | mp3 | ogg | flac",
"name": "response_format",
"in": "formData"
},
{
"type": "integer",
"description": "desired output sample rate",
"name": "sample_rate",
"in": "formData"
}
],
"responses": {
"200": {
"description": "transformed audio file",
"schema": {
"type": "string"
}
}
}
}
},
"/audio/transformations/stream": {
"get": {
"description": "Streams binary PCM frames in (interleaved stereo: ch0=audio, ch1=reference) and out (mono). The first message must be a JSON `session.update` envelope describing model + sample format + frame size + backend params. Server emits binary PCM on the same cadence.",
"tags": [
"audio"
],
"summary": "Bidirectional realtime audio transform over WebSocket.",
"responses": {}
}
},
"/backend/monitor": {
"get": {
"tags": [

View File

@@ -2896,6 +2896,104 @@ paths:
summary: Clear API traces
tags:
- monitoring
/audio/transform:
post:
consumes:
- multipart/form-data
description: Runs an audio-in / audio-out transform conditioned on an optional
auxiliary reference signal. Concrete transforms include AEC + noise suppression
+ dereverberation (LocalVQE), voice conversion (reference = target speaker),
and pitch shifting. The backend determines the operation; pass model-specific
tuning via repeated `params[<key>]=<value>` form fields.
parameters:
- description: model
in: formData
name: model
required: true
type: string
- description: primary input audio file
in: formData
name: audio
required: true
type: file
- description: auxiliary reference audio (loopback for AEC, target voice for
conversion, etc.)
in: formData
name: reference
type: file
- description: wav | mp3 | ogg | flac
in: formData
name: response_format
type: string
- description: desired output sample rate
in: formData
name: sample_rate
type: integer
produces:
- audio/x-wav
responses:
"200":
description: transformed audio file
schema:
type: string
summary: Transform audio (echo cancellation, noise suppression, voice conversion,
etc.)
tags:
- audio
/audio/transformations:
post:
consumes:
- multipart/form-data
description: Runs an audio-in / audio-out transform conditioned on an optional
auxiliary reference signal. Concrete transforms include AEC + noise suppression
+ dereverberation (LocalVQE), voice conversion (reference = target speaker),
and pitch shifting. The backend determines the operation; pass model-specific
tuning via repeated `params[<key>]=<value>` form fields.
parameters:
- description: model
in: formData
name: model
required: true
type: string
- description: primary input audio file
in: formData
name: audio
required: true
type: file
- description: auxiliary reference audio (loopback for AEC, target voice for
conversion, etc.)
in: formData
name: reference
type: file
- description: wav | mp3 | ogg | flac
in: formData
name: response_format
type: string
- description: desired output sample rate
in: formData
name: sample_rate
type: integer
produces:
- audio/x-wav
responses:
"200":
description: transformed audio file
schema:
type: string
summary: Transform audio (echo cancellation, noise suppression, voice conversion,
etc.)
tags:
- audio
/audio/transformations/stream:
get:
description: 'Streams binary PCM frames in (interleaved stereo: ch0=audio, ch1=reference)
and out (mono). The first message must be a JSON `session.update` envelope
describing model + sample format + frame size + backend params. Server emits
binary PCM on the same cadence.'
responses: {}
summary: Bidirectional realtime audio transform over WebSocket.
tags:
- audio
/backend/monitor:
get:
parameters: