mirror of
https://github.com/mudler/LocalAI.git
synced 2026-04-29 03:24:49 -04:00
feat(swagger): update swagger (#9607)
Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
This commit is contained in:
@@ -842,7 +842,7 @@ const docTemplate = `{
|
||||
"in": "body",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"$ref": "#/definitions/localai.vramEstimateRequest"
|
||||
"$ref": "#/definitions/modeladmin.VRAMRequest"
|
||||
}
|
||||
}
|
||||
],
|
||||
@@ -850,7 +850,7 @@ const docTemplate = `{
|
||||
"200": {
|
||||
"description": "VRAM estimate",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/localai.vramEstimateResponse"
|
||||
"$ref": "#/definitions/modeladmin.VRAMResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -3044,36 +3044,45 @@ const docTemplate = `{
|
||||
}
|
||||
}
|
||||
},
|
||||
"localai.vramEstimateRequest": {
|
||||
"model.BackendLogLine": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"context_size": {
|
||||
"description": "context length to estimate for (default 8192)",
|
||||
"type": "integer"
|
||||
"stream": {
|
||||
"description": "\"stdout\" or \"stderr\"",
|
||||
"type": "string"
|
||||
},
|
||||
"gpu_layers": {
|
||||
"description": "number of layers to offload to GPU (0 = all)",
|
||||
"type": "integer"
|
||||
"text": {
|
||||
"type": "string"
|
||||
},
|
||||
"kv_quant_bits": {
|
||||
"description": "KV cache quantization bits (0 = fp16)",
|
||||
"type": "integer"
|
||||
},
|
||||
"model": {
|
||||
"description": "model name (must be installed)",
|
||||
"timestamp": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"localai.vramEstimateResponse": {
|
||||
"modeladmin.VRAMRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"context_size": {
|
||||
"type": "integer"
|
||||
},
|
||||
"gpu_layers": {
|
||||
"type": "integer"
|
||||
},
|
||||
"kv_quant_bits": {
|
||||
"type": "integer"
|
||||
},
|
||||
"model": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"modeladmin.VRAMResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"context_note": {
|
||||
"description": "note when context_size was defaulted",
|
||||
"type": "string"
|
||||
},
|
||||
"model_max_context": {
|
||||
"description": "model's trained maximum context length",
|
||||
"type": "integer"
|
||||
},
|
||||
"sizeBytes": {
|
||||
@@ -3094,21 +3103,6 @@ const docTemplate = `{
|
||||
}
|
||||
}
|
||||
},
|
||||
"model.BackendLogLine": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"stream": {
|
||||
"description": "\"stdout\" or \"stderr\"",
|
||||
"type": "string"
|
||||
},
|
||||
"text": {
|
||||
"type": "string"
|
||||
},
|
||||
"timestamp": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"proto.MemoryUsageData": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
||||
@@ -839,7 +839,7 @@
|
||||
"in": "body",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"$ref": "#/definitions/localai.vramEstimateRequest"
|
||||
"$ref": "#/definitions/modeladmin.VRAMRequest"
|
||||
}
|
||||
}
|
||||
],
|
||||
@@ -847,7 +847,7 @@
|
||||
"200": {
|
||||
"description": "VRAM estimate",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/localai.vramEstimateResponse"
|
||||
"$ref": "#/definitions/modeladmin.VRAMResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -3041,36 +3041,45 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"localai.vramEstimateRequest": {
|
||||
"model.BackendLogLine": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"context_size": {
|
||||
"description": "context length to estimate for (default 8192)",
|
||||
"type": "integer"
|
||||
"stream": {
|
||||
"description": "\"stdout\" or \"stderr\"",
|
||||
"type": "string"
|
||||
},
|
||||
"gpu_layers": {
|
||||
"description": "number of layers to offload to GPU (0 = all)",
|
||||
"type": "integer"
|
||||
"text": {
|
||||
"type": "string"
|
||||
},
|
||||
"kv_quant_bits": {
|
||||
"description": "KV cache quantization bits (0 = fp16)",
|
||||
"type": "integer"
|
||||
},
|
||||
"model": {
|
||||
"description": "model name (must be installed)",
|
||||
"timestamp": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"localai.vramEstimateResponse": {
|
||||
"modeladmin.VRAMRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"context_size": {
|
||||
"type": "integer"
|
||||
},
|
||||
"gpu_layers": {
|
||||
"type": "integer"
|
||||
},
|
||||
"kv_quant_bits": {
|
||||
"type": "integer"
|
||||
},
|
||||
"model": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"modeladmin.VRAMResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"context_note": {
|
||||
"description": "note when context_size was defaulted",
|
||||
"type": "string"
|
||||
},
|
||||
"model_max_context": {
|
||||
"description": "model's trained maximum context length",
|
||||
"type": "integer"
|
||||
},
|
||||
"sizeBytes": {
|
||||
@@ -3091,21 +3100,6 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"model.BackendLogLine": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"stream": {
|
||||
"description": "\"stdout\" or \"stderr\"",
|
||||
"type": "string"
|
||||
},
|
||||
"text": {
|
||||
"type": "string"
|
||||
},
|
||||
"timestamp": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"proto.MemoryUsageData": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
||||
@@ -313,28 +313,32 @@ definitions:
|
||||
>= 1.
|
||||
type: integer
|
||||
type: object
|
||||
localai.vramEstimateRequest:
|
||||
model.BackendLogLine:
|
||||
properties:
|
||||
context_size:
|
||||
description: context length to estimate for (default 8192)
|
||||
type: integer
|
||||
gpu_layers:
|
||||
description: number of layers to offload to GPU (0 = all)
|
||||
type: integer
|
||||
kv_quant_bits:
|
||||
description: KV cache quantization bits (0 = fp16)
|
||||
type: integer
|
||||
model:
|
||||
description: model name (must be installed)
|
||||
stream:
|
||||
description: '"stdout" or "stderr"'
|
||||
type: string
|
||||
text:
|
||||
type: string
|
||||
timestamp:
|
||||
type: string
|
||||
type: object
|
||||
localai.vramEstimateResponse:
|
||||
modeladmin.VRAMRequest:
|
||||
properties:
|
||||
context_size:
|
||||
type: integer
|
||||
gpu_layers:
|
||||
type: integer
|
||||
kv_quant_bits:
|
||||
type: integer
|
||||
model:
|
||||
type: string
|
||||
type: object
|
||||
modeladmin.VRAMResponse:
|
||||
properties:
|
||||
context_note:
|
||||
description: note when context_size was defaulted
|
||||
type: string
|
||||
model_max_context:
|
||||
description: model's trained maximum context length
|
||||
type: integer
|
||||
sizeBytes:
|
||||
description: total model weight size in bytes
|
||||
@@ -349,16 +353,6 @@ definitions:
|
||||
description: human-readable VRAM (e.g. "6.1 GB")
|
||||
type: string
|
||||
type: object
|
||||
model.BackendLogLine:
|
||||
properties:
|
||||
stream:
|
||||
description: '"stdout" or "stderr"'
|
||||
type: string
|
||||
text:
|
||||
type: string
|
||||
timestamp:
|
||||
type: string
|
||||
type: object
|
||||
proto.MemoryUsageData:
|
||||
properties:
|
||||
breakdown:
|
||||
@@ -2710,14 +2704,14 @@ paths:
|
||||
name: request
|
||||
required: true
|
||||
schema:
|
||||
$ref: '#/definitions/localai.vramEstimateRequest'
|
||||
$ref: '#/definitions/modeladmin.VRAMRequest'
|
||||
produces:
|
||||
- application/json
|
||||
responses:
|
||||
"200":
|
||||
description: VRAM estimate
|
||||
schema:
|
||||
$ref: '#/definitions/localai.vramEstimateResponse'
|
||||
$ref: '#/definitions/modeladmin.VRAMResponse'
|
||||
summary: Estimate VRAM usage for a model
|
||||
tags:
|
||||
- config
|
||||
|
||||
Reference in New Issue
Block a user