mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-12 10:47:23 -04:00
Add a routing middleware stack and a cloud-proxy backend. * cloud-proxy: a Go gRPC backend that forwards OpenAI- and Anthropic-shaped chat requests to upstream providers, with an optional translate mode (OpenAI request -> Anthropic /v1/messages -> OpenAI response) and full tool-calling support. * routing: admission control, content-aware model routing (embedding cache + classifier + rerank + Arch-Router score), PII detection/redaction (regex + NER) with streaming filter and OpenAI/Anthropic adapters, and a per-user/per-key billing recorder backed by GORM or in-memory storage. * middleware: UsageMiddleware records usage via the billing recorder, plus admission, route-model, usage-stamp and trace middlewares. * observability: BackendTrace ring buffer stores full request bodies (capped), MITM proxy emits structured trace events, and router classifier decisions surface at /api/router/decide. * gallery: Arch-Router-1.5B (Q4_K_M and Q8_0). * UI: cloud-proxy model-editor fields, classifier system-prompt and score-normalization config, and a Traces page rendering request bodies. Assisted-by: claude-code:claude-opus-4-7 [Read] [Edit] [Bash] Signed-off-by: Richard Palethorpe <io@richiejp.com>
91 lines
3.3 KiB
Go
91 lines
3.3 KiB
Go
package meta
|
|
|
|
// Dynamic autocomplete provider constants (runtime lookup required).
|
|
const (
|
|
ProviderBackends = "backends"
|
|
ProviderModels = "models"
|
|
ProviderModelsChat = "models:chat"
|
|
ProviderModelsTTS = "models:tts"
|
|
ProviderModelsTranscript = "models:transcript"
|
|
ProviderModelsVAD = "models:vad"
|
|
)
|
|
|
|
// Static option lists embedded directly in field metadata.
|
|
|
|
var QuantizationOptions = []FieldOption{
|
|
{Value: "q4_0", Label: "Q4_0"},
|
|
{Value: "q4_1", Label: "Q4_1"},
|
|
{Value: "q5_0", Label: "Q5_0"},
|
|
{Value: "q5_1", Label: "Q5_1"},
|
|
{Value: "q8_0", Label: "Q8_0"},
|
|
{Value: "q2_K", Label: "Q2_K"},
|
|
{Value: "q3_K_S", Label: "Q3_K_S"},
|
|
{Value: "q3_K_M", Label: "Q3_K_M"},
|
|
{Value: "q3_K_L", Label: "Q3_K_L"},
|
|
{Value: "q4_K_S", Label: "Q4_K_S"},
|
|
{Value: "q4_K_M", Label: "Q4_K_M"},
|
|
{Value: "q5_K_S", Label: "Q5_K_S"},
|
|
{Value: "q5_K_M", Label: "Q5_K_M"},
|
|
{Value: "q6_K", Label: "Q6_K"},
|
|
}
|
|
|
|
var CacheTypeOptions = []FieldOption{
|
|
{Value: "f16", Label: "F16"},
|
|
{Value: "f32", Label: "F32"},
|
|
{Value: "q8_0", Label: "Q8_0"},
|
|
{Value: "q4_0", Label: "Q4_0"},
|
|
{Value: "q4_1", Label: "Q4_1"},
|
|
{Value: "q5_0", Label: "Q5_0"},
|
|
{Value: "q5_1", Label: "Q5_1"},
|
|
}
|
|
|
|
var DiffusersPipelineOptions = []FieldOption{
|
|
{Value: "StableDiffusionPipeline", Label: "StableDiffusionPipeline"},
|
|
{Value: "StableDiffusionImg2ImgPipeline", Label: "StableDiffusionImg2ImgPipeline"},
|
|
{Value: "StableDiffusionXLPipeline", Label: "StableDiffusionXLPipeline"},
|
|
{Value: "StableDiffusionXLImg2ImgPipeline", Label: "StableDiffusionXLImg2ImgPipeline"},
|
|
{Value: "StableDiffusionDepth2ImgPipeline", Label: "StableDiffusionDepth2ImgPipeline"},
|
|
{Value: "DiffusionPipeline", Label: "DiffusionPipeline"},
|
|
{Value: "StableVideoDiffusionPipeline", Label: "StableVideoDiffusionPipeline"},
|
|
}
|
|
|
|
// UsecaseOptions must stay in sync with GetAllModelConfigUsecases in
|
|
// core/config/model_config.go — a value missing here is silently
|
|
// inaccessible from the model editor, which is how `score` (the router
|
|
// classifier usecase) hid for an entire release.
|
|
var UsecaseOptions = []FieldOption{
|
|
{Value: "chat", Label: "Chat"},
|
|
{Value: "completion", Label: "Completion"},
|
|
{Value: "edit", Label: "Edit"},
|
|
{Value: "embeddings", Label: "Embeddings"},
|
|
{Value: "rerank", Label: "Rerank"},
|
|
{Value: "score", Label: "Score (Router Classifier)"},
|
|
{Value: "image", Label: "Image"},
|
|
{Value: "vision", Label: "Vision"},
|
|
{Value: "detection", Label: "Detection"},
|
|
{Value: "face_recognition", Label: "Face Recognition"},
|
|
{Value: "transcript", Label: "Transcript"},
|
|
{Value: "diarization", Label: "Diarization"},
|
|
{Value: "speaker_recognition", Label: "Speaker Recognition"},
|
|
{Value: "tts", Label: "TTS"},
|
|
{Value: "sound_generation", Label: "Sound Generation"},
|
|
{Value: "audio_transform", Label: "Audio Transform"},
|
|
{Value: "realtime_audio", Label: "Realtime Audio"},
|
|
{Value: "tokenize", Label: "Tokenize"},
|
|
{Value: "vad", Label: "VAD"},
|
|
{Value: "video", Label: "Video"},
|
|
}
|
|
|
|
var DiffusersSchedulerOptions = []FieldOption{
|
|
{Value: "ddim", Label: "DDIM"},
|
|
{Value: "ddpm", Label: "DDPM"},
|
|
{Value: "pndm", Label: "PNDM"},
|
|
{Value: "lms", Label: "LMS"},
|
|
{Value: "euler", Label: "Euler"},
|
|
{Value: "euler_a", Label: "Euler A"},
|
|
{Value: "dpm_multistep", Label: "DPM Multistep"},
|
|
{Value: "dpm_singlestep", Label: "DPM Singlestep"},
|
|
{Value: "heun", Label: "Heun"},
|
|
{Value: "unipc", Label: "UniPC"},
|
|
}
|