diff --git a/swagger/docs.go b/swagger/docs.go index b7f5617a5..f1f3f8ae9 100644 --- a/swagger/docs.go +++ b/swagger/docs.go @@ -907,6 +907,90 @@ const docTemplate = `{ } } }, + "/api/nodes/{id}/max-replicas-per-model": { + "put": { + "tags": [ + "Nodes" + ], + "summary": "Update a node's max replicas per model", + "parameters": [ + { + "type": "string", + "description": "Node ID", + "name": "id", + "in": "path", + "required": true + }, + { + "description": "New value", + "name": "request", + "in": "body", + "required": true, + "schema": { + "$ref": "#/definitions/localai.UpdateMaxReplicasPerModelRequest" + } + } + ], + "responses": { + "200": { + "description": "OK", + "schema": { + "type": "object", + "additionalProperties": { + "type": "integer" + } + } + }, + "400": { + "description": "value must be \u003e= 1", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "404": { + "description": "node not found", + "schema": { + "type": "object", + "additionalProperties": true + } + } + } + }, + "delete": { + "tags": [ + "Nodes" + ], + "summary": "Reset a node's max replicas per model to the worker default", + "parameters": [ + { + "type": "string", + "description": "Node ID", + "name": "id", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "OK", + "schema": { + "type": "object", + "additionalProperties": { + "type": "boolean" + } + } + }, + "404": { + "description": "node not found", + "schema": { + "type": "object", + "additionalProperties": true + } + } + } + } + }, "/api/p2p": { "get": { "tags": [ @@ -2951,6 +3035,15 @@ const docTemplate = `{ } } }, + "localai.UpdateMaxReplicasPerModelRequest": { + "type": "object", + "properties": { + "value": { + "description": "Value is the new per-model replica cap on this node. Must be \u003e= 1.", + "type": "integer" + } + } + }, "localai.vramEstimateRequest": { "type": "object", "properties": { diff --git a/swagger/swagger.json b/swagger/swagger.json index 907b9594c..fd2f2fd1a 100644 --- a/swagger/swagger.json +++ b/swagger/swagger.json @@ -904,6 +904,90 @@ } } }, + "/api/nodes/{id}/max-replicas-per-model": { + "put": { + "tags": [ + "Nodes" + ], + "summary": "Update a node's max replicas per model", + "parameters": [ + { + "type": "string", + "description": "Node ID", + "name": "id", + "in": "path", + "required": true + }, + { + "description": "New value", + "name": "request", + "in": "body", + "required": true, + "schema": { + "$ref": "#/definitions/localai.UpdateMaxReplicasPerModelRequest" + } + } + ], + "responses": { + "200": { + "description": "OK", + "schema": { + "type": "object", + "additionalProperties": { + "type": "integer" + } + } + }, + "400": { + "description": "value must be \u003e= 1", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "404": { + "description": "node not found", + "schema": { + "type": "object", + "additionalProperties": true + } + } + } + }, + "delete": { + "tags": [ + "Nodes" + ], + "summary": "Reset a node's max replicas per model to the worker default", + "parameters": [ + { + "type": "string", + "description": "Node ID", + "name": "id", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "OK", + "schema": { + "type": "object", + "additionalProperties": { + "type": "boolean" + } + } + }, + "404": { + "description": "node not found", + "schema": { + "type": "object", + "additionalProperties": true + } + } + } + } + }, "/api/p2p": { "get": { "tags": [ @@ -2948,6 +3032,15 @@ } } }, + "localai.UpdateMaxReplicasPerModelRequest": { + "type": "object", + "properties": { + "value": { + "description": "Value is the new per-model replica cap on this node. Must be \u003e= 1.", + "type": "integer" + } + } + }, "localai.vramEstimateRequest": { "type": "object", "properties": { diff --git a/swagger/swagger.yaml b/swagger/swagger.yaml index 2a7f8e4cb..93a6f582e 100644 --- a/swagger/swagger.yaml +++ b/swagger/swagger.yaml @@ -306,6 +306,13 @@ definitions: success: type: boolean type: object + localai.UpdateMaxReplicasPerModelRequest: + properties: + value: + description: Value is the new per-model replica cap on this node. Must be + >= 1. + type: integer + type: object localai.vramEstimateRequest: properties: context_size: @@ -2714,6 +2721,62 @@ paths: summary: Estimate VRAM usage for a model tags: - config + /api/nodes/{id}/max-replicas-per-model: + delete: + parameters: + - description: Node ID + in: path + name: id + required: true + type: string + responses: + "200": + description: OK + schema: + additionalProperties: + type: boolean + type: object + "404": + description: node not found + schema: + additionalProperties: true + type: object + summary: Reset a node's max replicas per model to the worker default + tags: + - Nodes + put: + parameters: + - description: Node ID + in: path + name: id + required: true + type: string + - description: New value + in: body + name: request + required: true + schema: + $ref: '#/definitions/localai.UpdateMaxReplicasPerModelRequest' + responses: + "200": + description: OK + schema: + additionalProperties: + type: integer + type: object + "400": + description: value must be >= 1 + schema: + additionalProperties: true + type: object + "404": + description: node not found + schema: + additionalProperties: true + type: object + summary: Update a node's max replicas per model + tags: + - Nodes /api/p2p: get: responses: