mirror of
https://github.com/mudler/LocalAI.git
synced 2026-04-01 13:42:20 -04:00
* feat: add distributed mode (experimental) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix data races, mutexes, transactions Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix events and tool stream in agent chat Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * use ginkgo Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(cron): compute correctly time boundaries avoiding re-triggering Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * enhancements, refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * do not flood of healthy checks Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * do not list obvious backends as text backends Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * tests fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * refactoring and consolidation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Drop redundant healthcheck Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * enhancements, refactorings Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
56 lines
2.7 KiB
Go
56 lines
2.7 KiB
Go
package schema
|
|
|
|
// QuantizationJobRequest is the REST API request to start a quantization job.
|
|
type QuantizationJobRequest struct {
|
|
Model string `json:"model"` // HF model name or local path
|
|
Backend string `json:"backend"` // "llama-cpp-quantization"
|
|
QuantizationType string `json:"quantization_type,omitempty"` // q4_k_m, q5_k_m, q8_0, f16, etc.
|
|
ExtraOptions map[string]string `json:"extra_options,omitempty"`
|
|
}
|
|
|
|
// QuantizationJob represents a quantization job with its current state.
|
|
type QuantizationJob struct {
|
|
ID string `json:"id"`
|
|
UserID string `json:"user_id,omitempty"`
|
|
Model string `json:"model"`
|
|
Backend string `json:"backend"`
|
|
ModelID string `json:"model_id,omitempty"`
|
|
QuantizationType string `json:"quantization_type"`
|
|
Status string `json:"status"` // queued, downloading, converting, quantizing, completed, failed, stopped
|
|
Message string `json:"message,omitempty"`
|
|
OutputDir string `json:"output_dir"`
|
|
OutputFile string `json:"output_file,omitempty"` // path to final GGUF
|
|
ExtraOptions map[string]string `json:"extra_options,omitempty"`
|
|
CreatedAt string `json:"created_at"`
|
|
|
|
// Import state (tracked separately from quantization status)
|
|
ImportStatus string `json:"import_status,omitempty"` // "", "importing", "completed", "failed"
|
|
ImportMessage string `json:"import_message,omitempty"`
|
|
ImportModelName string `json:"import_model_name,omitempty"` // registered model name after import
|
|
|
|
// Full config for reuse
|
|
Config *QuantizationJobRequest `json:"config,omitempty"`
|
|
}
|
|
|
|
// QuantizationJobResponse is the REST API response when creating a job.
|
|
type QuantizationJobResponse struct {
|
|
ID string `json:"id"`
|
|
Status string `json:"status"`
|
|
Message string `json:"message"`
|
|
}
|
|
|
|
// QuantizationProgressEvent is an SSE event for quantization progress.
|
|
type QuantizationProgressEvent struct {
|
|
JobID string `json:"job_id"`
|
|
ProgressPercent float32 `json:"progress_percent"`
|
|
Status string `json:"status"`
|
|
Message string `json:"message,omitempty"`
|
|
OutputFile string `json:"output_file,omitempty"`
|
|
ExtraMetrics map[string]float32 `json:"extra_metrics,omitempty"`
|
|
}
|
|
|
|
// QuantizationImportRequest is the REST API request to import a quantized model.
|
|
type QuantizationImportRequest struct {
|
|
Name string `json:"name,omitempty"` // model name for LocalAI (auto-generated if empty)
|
|
}
|