feat(swagger): update swagger (#9607)

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
This commit is contained in:
LocalAI [bot]
2026-04-29 00:18:02 +02:00
committed by GitHub
parent e370318bd7
commit 1fe3558ec6
3 changed files with 75 additions and 93 deletions

View File

@@ -842,7 +842,7 @@ const docTemplate = `{
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/localai.vramEstimateRequest"
"$ref": "#/definitions/modeladmin.VRAMRequest"
}
}
],
@@ -850,7 +850,7 @@ const docTemplate = `{
"200": {
"description": "VRAM estimate",
"schema": {
"$ref": "#/definitions/localai.vramEstimateResponse"
"$ref": "#/definitions/modeladmin.VRAMResponse"
}
}
}
@@ -3044,36 +3044,45 @@ const docTemplate = `{
}
}
},
"localai.vramEstimateRequest": {
"model.BackendLogLine": {
"type": "object",
"properties": {
"context_size": {
"description": "context length to estimate for (default 8192)",
"type": "integer"
"stream": {
"description": "\"stdout\" or \"stderr\"",
"type": "string"
},
"gpu_layers": {
"description": "number of layers to offload to GPU (0 = all)",
"type": "integer"
"text": {
"type": "string"
},
"kv_quant_bits": {
"description": "KV cache quantization bits (0 = fp16)",
"type": "integer"
},
"model": {
"description": "model name (must be installed)",
"timestamp": {
"type": "string"
}
}
},
"localai.vramEstimateResponse": {
"modeladmin.VRAMRequest": {
"type": "object",
"properties": {
"context_size": {
"type": "integer"
},
"gpu_layers": {
"type": "integer"
},
"kv_quant_bits": {
"type": "integer"
},
"model": {
"type": "string"
}
}
},
"modeladmin.VRAMResponse": {
"type": "object",
"properties": {
"context_note": {
"description": "note when context_size was defaulted",
"type": "string"
},
"model_max_context": {
"description": "model's trained maximum context length",
"type": "integer"
},
"sizeBytes": {
@@ -3094,21 +3103,6 @@ const docTemplate = `{
}
}
},
"model.BackendLogLine": {
"type": "object",
"properties": {
"stream": {
"description": "\"stdout\" or \"stderr\"",
"type": "string"
},
"text": {
"type": "string"
},
"timestamp": {
"type": "string"
}
}
},
"proto.MemoryUsageData": {
"type": "object",
"properties": {

View File

@@ -839,7 +839,7 @@
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/localai.vramEstimateRequest"
"$ref": "#/definitions/modeladmin.VRAMRequest"
}
}
],
@@ -847,7 +847,7 @@
"200": {
"description": "VRAM estimate",
"schema": {
"$ref": "#/definitions/localai.vramEstimateResponse"
"$ref": "#/definitions/modeladmin.VRAMResponse"
}
}
}
@@ -3041,36 +3041,45 @@
}
}
},
"localai.vramEstimateRequest": {
"model.BackendLogLine": {
"type": "object",
"properties": {
"context_size": {
"description": "context length to estimate for (default 8192)",
"type": "integer"
"stream": {
"description": "\"stdout\" or \"stderr\"",
"type": "string"
},
"gpu_layers": {
"description": "number of layers to offload to GPU (0 = all)",
"type": "integer"
"text": {
"type": "string"
},
"kv_quant_bits": {
"description": "KV cache quantization bits (0 = fp16)",
"type": "integer"
},
"model": {
"description": "model name (must be installed)",
"timestamp": {
"type": "string"
}
}
},
"localai.vramEstimateResponse": {
"modeladmin.VRAMRequest": {
"type": "object",
"properties": {
"context_size": {
"type": "integer"
},
"gpu_layers": {
"type": "integer"
},
"kv_quant_bits": {
"type": "integer"
},
"model": {
"type": "string"
}
}
},
"modeladmin.VRAMResponse": {
"type": "object",
"properties": {
"context_note": {
"description": "note when context_size was defaulted",
"type": "string"
},
"model_max_context": {
"description": "model's trained maximum context length",
"type": "integer"
},
"sizeBytes": {
@@ -3091,21 +3100,6 @@
}
}
},
"model.BackendLogLine": {
"type": "object",
"properties": {
"stream": {
"description": "\"stdout\" or \"stderr\"",
"type": "string"
},
"text": {
"type": "string"
},
"timestamp": {
"type": "string"
}
}
},
"proto.MemoryUsageData": {
"type": "object",
"properties": {

View File

@@ -313,28 +313,32 @@ definitions:
>= 1.
type: integer
type: object
localai.vramEstimateRequest:
model.BackendLogLine:
properties:
context_size:
description: context length to estimate for (default 8192)
type: integer
gpu_layers:
description: number of layers to offload to GPU (0 = all)
type: integer
kv_quant_bits:
description: KV cache quantization bits (0 = fp16)
type: integer
model:
description: model name (must be installed)
stream:
description: '"stdout" or "stderr"'
type: string
text:
type: string
timestamp:
type: string
type: object
localai.vramEstimateResponse:
modeladmin.VRAMRequest:
properties:
context_size:
type: integer
gpu_layers:
type: integer
kv_quant_bits:
type: integer
model:
type: string
type: object
modeladmin.VRAMResponse:
properties:
context_note:
description: note when context_size was defaulted
type: string
model_max_context:
description: model's trained maximum context length
type: integer
sizeBytes:
description: total model weight size in bytes
@@ -349,16 +353,6 @@ definitions:
description: human-readable VRAM (e.g. "6.1 GB")
type: string
type: object
model.BackendLogLine:
properties:
stream:
description: '"stdout" or "stderr"'
type: string
text:
type: string
timestamp:
type: string
type: object
proto.MemoryUsageData:
properties:
breakdown:
@@ -2710,14 +2704,14 @@ paths:
name: request
required: true
schema:
$ref: '#/definitions/localai.vramEstimateRequest'
$ref: '#/definitions/modeladmin.VRAMRequest'
produces:
- application/json
responses:
"200":
description: VRAM estimate
schema:
$ref: '#/definitions/localai.vramEstimateResponse'
$ref: '#/definitions/modeladmin.VRAMResponse'
summary: Estimate VRAM usage for a model
tags:
- config