diff --git a/core/cli/run.go b/core/cli/run.go index 7b0450d3b..65cb07c0f 100644 --- a/core/cli/run.go +++ b/core/cli/run.go @@ -30,6 +30,8 @@ type RunCMD struct { ModelArgs []string `arg:"" optional:"" name:"models" help:"Model configuration URLs to load"` ExternalBackends []string `env:"LOCALAI_EXTERNAL_BACKENDS,EXTERNAL_BACKENDS" help:"A list of external backends to load from gallery on boot" group:"backends"` + WebRTCNAT1To1IPs []string `env:"LOCALAI_WEBRTC_NAT_1TO1_IPS,WEBRTC_NAT_1TO1_IPS" help:"IPs advertised as the host ICE candidates for /v1/realtime WebRTC instead of every local interface. Set to the reachable host/LAN IP when running under Docker host networking or NAT, where pion otherwise offers unreachable bridge addresses and the connection drops after ICE consent checks fail." group:"api"` + WebRTCICEInterfaces []string `env:"LOCALAI_WEBRTC_ICE_INTERFACES,WEBRTC_ICE_INTERFACES" help:"Restrict /v1/realtime WebRTC ICE candidate gathering to these network interfaces (e.g. eth0), filtering out docker0/veth noise." group:"api"` BackendsPath string `env:"LOCALAI_BACKENDS_PATH,BACKENDS_PATH" type:"path" default:"${basepath}/backends" help:"Path containing backends used for inferencing" group:"backends"` BackendsSystemPath string `env:"LOCALAI_BACKENDS_SYSTEM_PATH,BACKEND_SYSTEM_PATH" type:"path" default:"/var/lib/local-ai/backends" help:"Path containing system backends used for inferencing" group:"backends"` ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"` @@ -225,6 +227,8 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error { config.WithApiKeys(r.APIKeys), config.WithModelsURL(append(r.Models, r.ModelArgs...)...), config.WithExternalBackends(r.ExternalBackends...), + config.WithWebRTCNAT1To1IPs(r.WebRTCNAT1To1IPs...), + config.WithWebRTCICEInterfaces(r.WebRTCICEInterfaces...), config.WithOpaqueErrors(r.OpaqueErrors), config.WithEnforcedPredownloadScans(!r.DisablePredownloadScan), config.WithSubtleKeyComparison(r.UseSubtleKeyComparison), diff --git a/core/config/application_config.go b/core/config/application_config.go index dd36b97b9..1e1bacc2c 100644 --- a/core/config/application_config.go +++ b/core/config/application_config.go @@ -12,10 +12,19 @@ import ( ) type ApplicationConfig struct { - Context context.Context - ConfigFile string - SystemState *system.SystemState - ExternalBackends []string + Context context.Context + ConfigFile string + SystemState *system.SystemState + ExternalBackends []string + + // WebRTCNAT1To1IPs, when set, are advertised as the host ICE candidates for + // /v1/realtime WebRTC instead of every local interface address. Needed when + // the routable address differs from what pion gathers — e.g. Docker host + // networking (where pion also offers unreachable bridge IPs) or NAT. + WebRTCNAT1To1IPs []string + // WebRTCICEInterfaces, when set, restricts ICE candidate gathering to these + // network interfaces (e.g. eth0), filtering out docker0/veth noise. + WebRTCICEInterfaces []string UploadLimitMB, Threads, ContextSize int F16 bool Debug bool @@ -81,7 +90,6 @@ type ApplicationConfig struct { // file is mode 0600. MITMCADir string - // PIIPatternOverrides applies persisted per-id deltas (action, // disabled) to the live redactor at startup. Loaded from // runtime_settings.json and applied right after pii.NewRedactor. @@ -116,11 +124,11 @@ type ApplicationConfig struct { // --require-backend-integrity / LOCALAI_REQUIRE_BACKEND_INTEGRITY. RequireBackendIntegrity bool - SingleBackend bool // Deprecated: use MaxActiveBackends = 1 instead - MaxActiveBackends int // Maximum number of active backends (0 = unlimited, 1 = single backend mode) - WatchDogIdle bool - WatchDogBusy bool - WatchDog bool + SingleBackend bool // Deprecated: use MaxActiveBackends = 1 instead + MaxActiveBackends int // Maximum number of active backends (0 = unlimited, 1 = single backend mode) + WatchDogIdle bool + WatchDogBusy bool + WatchDog bool // Memory Reclaimer settings (works with GPU if available, otherwise RAM) MemoryReclaimerEnabled bool // Enable memory threshold monitoring @@ -311,6 +319,18 @@ func WithExternalBackends(backends ...string) AppOption { } } +func WithWebRTCNAT1To1IPs(ips ...string) AppOption { + return func(o *ApplicationConfig) { + o.WebRTCNAT1To1IPs = ips + } +} + +func WithWebRTCICEInterfaces(interfaces ...string) AppOption { + return func(o *ApplicationConfig) { + o.WebRTCICEInterfaces = interfaces + } +} + func WithMachineTag(tag string) AppOption { return func(o *ApplicationConfig) { o.MachineTag = tag @@ -702,7 +722,6 @@ func WithMITMCADir(dir string) AppOption { } } - func WithDynamicConfigDir(dynamicConfigsDir string) AppOption { return func(o *ApplicationConfig) { o.DynamicConfigsDir = dynamicConfigsDir diff --git a/core/http/endpoints/openai/realtime_webrtc.go b/core/http/endpoints/openai/realtime_webrtc.go index fd305799c..0ac982c19 100644 --- a/core/http/endpoints/openai/realtime_webrtc.go +++ b/core/http/endpoints/openai/realtime_webrtc.go @@ -48,7 +48,8 @@ func RealtimeCalls(application *application.Application) echo.HandlerFunc { return c.JSON(http.StatusInternalServerError, map[string]string{"error": "codec registration failed"}) } - api := webrtc.NewAPI(webrtc.WithMediaEngine(m)) + se := webRTCSettingEngine(application.ApplicationConfig()) + api := webrtc.NewAPI(webrtc.WithMediaEngine(m), webrtc.WithSettingEngine(se)) pc, err := api.NewPeerConnection(webrtc.Configuration{}) if err != nil { diff --git a/core/http/endpoints/openai/realtime_webrtc_ice.go b/core/http/endpoints/openai/realtime_webrtc_ice.go new file mode 100644 index 000000000..82e106c73 --- /dev/null +++ b/core/http/endpoints/openai/realtime_webrtc_ice.go @@ -0,0 +1,47 @@ +package openai + +import ( + "github.com/mudler/LocalAI/core/config" + "github.com/mudler/xlog" + "github.com/pion/webrtc/v4" +) + +// webRTCSettingEngine builds the pion SettingEngine for /v1/realtime WebRTC. +// +// With a default (empty) SettingEngine, pion gathers a host ICE candidate for +// every local interface. Under Docker host networking that includes bridge +// addresses (docker0/veth, 172.x) that a remote browser cannot route to; the +// connection often establishes on a good pair and then drops once ICE consent +// checks fail on the unreachable ones. The two opt-in knobs below let an +// operator advertise only the reachable address. +func webRTCSettingEngine(cfg *config.ApplicationConfig) webrtc.SettingEngine { + s := webrtc.SettingEngine{} + if cfg == nil { + return s + } + if len(cfg.WebRTCNAT1To1IPs) > 0 { + s.SetNAT1To1IPs(cfg.WebRTCNAT1To1IPs, webrtc.ICECandidateTypeHost) + xlog.Debug("realtime webrtc: advertising NAT 1:1 host IPs", "ips", cfg.WebRTCNAT1To1IPs) + } + if filter := iceInterfaceFilter(cfg.WebRTCICEInterfaces); filter != nil { + s.SetInterfaceFilter(filter) + xlog.Debug("realtime webrtc: restricting ICE interfaces", "interfaces", cfg.WebRTCICEInterfaces) + } + return s +} + +// iceInterfaceFilter returns an interface allow-list predicate for pion, or nil +// when no interfaces are configured (pion's default: gather from all). +func iceInterfaceFilter(allowed []string) func(string) bool { + if len(allowed) == 0 { + return nil + } + set := make(map[string]struct{}, len(allowed)) + for _, name := range allowed { + set[name] = struct{}{} + } + return func(iface string) bool { + _, ok := set[iface] + return ok + } +} diff --git a/core/http/endpoints/openai/realtime_webrtc_ice_test.go b/core/http/endpoints/openai/realtime_webrtc_ice_test.go new file mode 100644 index 000000000..fbda610cb --- /dev/null +++ b/core/http/endpoints/openai/realtime_webrtc_ice_test.go @@ -0,0 +1,39 @@ +package openai + +import ( + "github.com/mudler/LocalAI/core/config" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("webRTC ICE settings", func() { + Describe("iceInterfaceFilter", func() { + It("returns nil when no interfaces are configured", func() { + Expect(iceInterfaceFilter(nil)).To(BeNil()) + Expect(iceInterfaceFilter([]string{})).To(BeNil()) + }) + + It("admits only the configured interfaces", func() { + f := iceInterfaceFilter([]string{"eth0", "wlan0"}) + Expect(f).NotTo(BeNil()) + Expect(f("eth0")).To(BeTrue()) + Expect(f("wlan0")).To(BeTrue()) + Expect(f("docker0")).To(BeFalse()) + Expect(f("veth123")).To(BeFalse()) + }) + }) + + Describe("webRTCSettingEngine", func() { + It("does not panic on a nil config", func() { + Expect(func() { webRTCSettingEngine(nil) }).NotTo(Panic()) + }) + + It("builds an engine with NAT 1:1 IPs and an interface filter configured", func() { + cfg := &config.ApplicationConfig{ + WebRTCNAT1To1IPs: []string{"192.168.1.10"}, + WebRTCICEInterfaces: []string{"eth0"}, + } + Expect(func() { webRTCSettingEngine(cfg) }).NotTo(Panic()) + }) + }) +}) diff --git a/docs/content/features/openai-realtime.md b/docs/content/features/openai-realtime.md index 8dba6d419..e76ad4791 100644 --- a/docs/content/features/openai-realtime.md +++ b/docs/content/features/openai-realtime.md @@ -74,6 +74,28 @@ EXTERNAL_GRPC_BACKENDS=opus:/path/to/backend/go/opus/opus The opus backend is loaded automatically when a WebRTC session starts. It does not require any model configuration file — just the backend binary. +#### WebRTC behind Docker host networking or NAT + +By default pion gathers a host ICE candidate for every local interface. Under +Docker **host networking** that includes bridge addresses (`docker0`/`veth`, +`172.x`) that a remote browser cannot route to: the call typically connects on a +good candidate and then drops a few seconds later when ICE consent checks fail on +the unreachable ones. Two settings let you advertise only the reachable address: + +```bash +# Advertise these IPs as the host ICE candidates (e.g. the host's LAN IP) +LOCALAI_WEBRTC_NAT_1TO1_IPS=192.168.1.10 + +# ...or restrict ICE gathering to specific interfaces +LOCALAI_WEBRTC_ICE_INTERFACES=eth0 +``` + +{{% notice tip %}} +For a browser on another LAN machine talking to LocalAI in a host-networked +container, set `LOCALAI_WEBRTC_NAT_1TO1_IPS` to the host's LAN IP. This is the +most reliable fix for WebRTC connections that establish and then drop. +{{% /notice %}} + ## Protocol The API follows the OpenAI Realtime API protocol for handling sessions, audio buffers, and conversation items.