diff --git a/src/agent/Agent.zig b/src/agent/Agent.zig index 36fca833..b1e34be3 100644 --- a/src/agent/Agent.zig +++ b/src/agent/Agent.zig @@ -139,33 +139,41 @@ fn resolveModelName(opts: Config.Agent, resolved: ?settings.ResolvedProvider, re return ""; } -/// Ollama's local catalog is authoritative and cheap to query, unlike the -/// cloud providers whose `/models` listings can lag actual availability — so -/// only for Ollama do we confirm the model is installed up front instead of -/// letting the first request fail mid-turn (its default model may not be -/// pulled either). Returns false when a one-shot run should abort; a REPL -/// session only warns, since `/model` can still fix it interactively. -fn verifyOllamaModelInstalled( +const OllamaModel = union(enum) { + /// Owned by the allocator passed to reconcileOllamaModel. + use: []u8, + abort, +}; + +/// Only Ollama: its local catalog is authoritative and cheap, unlike cloud +/// `/models` listings that can lag real availability. A non-explicit (default) +/// model that isn't installed is swapped for an installed one; an explicit one +/// warns, and aborts a one-shot run. +fn reconcileOllamaModel( allocator: std.mem.Allocator, llm: Credentials, - model: []const u8, + desired: []const u8, base_url: ?[:0]const u8, + explicit: bool, is_one_shot: bool, -) bool { - if (llm.provider != .ollama or model.len == 0) return true; - +) !OllamaModel { var arena: std.heap.ArenaAllocator = .init(allocator); defer arena.deinit(); - // Server unreachable or empty catalog: don't block on what we can't confirm. - const ids = zenai.provider.listChatModelIds(allocator, arena.allocator(), .ollama, llm.key, base_url) catch return true; - if (ids.len == 0 or containsString(ids, model)) return true; - - const installed = std.mem.join(arena.allocator(), ", ", ids) catch return true; - std.debug.print( - "Model '{s}' is not installed in Ollama.\nInstalled: {s}\nRun `ollama pull {s}` to install it, or choose one of the above.\n", - .{ model, installed, model }, - ); - return !is_one_shot; + // Unreachable server → empty list → fall through and use `desired` unchecked. + const ids: []const []const u8 = zenai.provider.listChatModelIds(allocator, arena.allocator(), .ollama, llm.key, base_url) catch &.{}; + if (ids.len != 0 and !containsString(ids, desired)) { + if (!explicit) { + std.debug.print("Default Ollama model '{s}' is not installed; using '{s}'.\n", .{ desired, ids[0] }); + return .{ .use = try allocator.dupe(u8, ids[0]) }; + } + const installed = std.mem.join(arena.allocator(), ", ", ids) catch ""; + std.debug.print( + "Model '{s}' is not installed in Ollama.\nInstalled: {s}\nRun `ollama pull {s}` to install it, or choose one of the above.\n", + .{ desired, installed, desired }, + ); + if (is_one_shot) return .abort; + } + return .{ .use = try allocator.dupe(u8, desired) }; } pub fn init(allocator: std.mem.Allocator, app: *App, opts: Config.Agent) !*Agent { @@ -213,11 +221,11 @@ pub fn init(allocator: std.mem.Allocator, app: *App, opts: Config.Agent) !*Agent // interactive "Select a provider" prompt. On error paths (missing key / // no key detected) resolveCredentials prints its own message and the // banner is skipped. - if (will_repl and (!resolve or settings.wouldResolve(opts, remembered))) { + if (will_repl and (!resolve or settings.wouldResolve(allocator, opts, remembered))) { std.debug.print(Terminal.ansi.bold ++ "\n Lightpanda Agent" ++ Terminal.ansi.reset ++ " " ++ Terminal.ansi.dim ++ "({s})" ++ Terminal.ansi.reset ++ "\n", .{lp.build_config.version}); } - const resolved: ?settings.ResolvedProvider = if (resolve) try settings.resolveCredentials(opts, remembered, will_repl) else null; + const resolved: ?settings.ResolvedProvider = if (resolve) try settings.resolveCredentials(allocator, opts, remembered, will_repl) else null; const llm: ?Credentials = if (resolved) |r| r.credentials else null; if (llm == null and requires_llm) { @@ -227,14 +235,20 @@ pub fn init(allocator: std.mem.Allocator, app: *App, opts: Config.Agent) !*Agent return error.MissingProvider; } - const model = try allocator.dupe(u8, resolveModelName(opts, resolved, remembered)); + var model = try allocator.dupe(u8, resolveModelName(opts, resolved, remembered)); errdefer allocator.free(model); - if (llm) |l| { - if (!verifyOllamaModelInstalled(allocator, l, model, opts.base_url, is_one_shot)) { - return error.ModelNotInstalled; + if (llm) |l| if (l.provider == .ollama) { + const explicit = opts.model != null or + (remembered != null and remembered.?.provider == .ollama); + switch (try reconcileOllamaModel(allocator, l, model, opts.base_url, explicit, is_one_shot)) { + .use => |m| { + allocator.free(model); + model = m; + }, + .abort => return error.ModelNotInstalled, } - } + }; if (resolved) |r| { if (r.source == .picked) { @@ -645,8 +659,7 @@ fn handleModel(self: *Agent, _: std.mem.Allocator, rest: []const u8) void { return; } const ids = completionModels(self, self.allocator); - // Empty means the fetch failed (or a local provider with unlisted models); - // skip the check rather than block a model we just can't confirm. + // Empty list = fetch failed or unlisted local models; can't confirm, so allow. if (ids.len != 0 and !containsString(ids, trimmed)) { self.terminal.printError("unknown model: {s} (Tab to list)", .{trimmed}); return; @@ -1605,7 +1618,7 @@ pub fn listModels(allocator: std.mem.Allocator, opts: Config.Agent) !void { }); return error.ConflictingFlags; } - const resolved = (try settings.resolveCredentials(opts, null, false)) orelse return error.MissingProvider; + const resolved = (try settings.resolveCredentials(allocator, opts, null, false)) orelse return error.MissingProvider; const llm = resolved.credentials; var arena: std.heap.ArenaAllocator = .init(allocator); diff --git a/src/agent/settings.zig b/src/agent/settings.zig index ddd914dd..39dabc1b 100644 --- a/src/agent/settings.zig +++ b/src/agent/settings.zig @@ -39,18 +39,30 @@ pub const ResolvedProvider = struct { source: enum { flag, remembered, detected, picked }, }; +/// Ollama needs no API key, so it's excluded from env detection +/// (`default_candidates`) and only probed here as a last resort. +fn detectOllama(allocator: std.mem.Allocator, base_url: ?[:0]const u8) ?Credentials { + const key = zenai.provider.envApiKey(.ollama) orelse return null; + var arena: std.heap.ArenaAllocator = .init(allocator); + defer arena.deinit(); + const ids = zenai.provider.listChatModelIds(allocator, arena.allocator(), .ollama, key, base_url) catch return null; + if (ids.len == 0) return null; + return .{ .provider = .ollama, .key = key }; +} + /// Returns true when resolveCredentials would succeed (no error, non-null). /// Used by callers that need to print a banner before calling resolveCredentials. -pub fn wouldResolve(opts: Config.Agent, remembered: ?Remembered) bool { +pub fn wouldResolve(allocator: std.mem.Allocator, opts: Config.Agent, remembered: ?Remembered) bool { if (opts.provider) |p| return zenai.provider.envApiKey(p) != null; if (remembered) |r| if (zenai.provider.envApiKey(r.provider)) |_| return true; var buf: [zenai.provider.default_candidates.len]Credentials = undefined; - return zenai.provider.detectKeys(&buf, zenai.provider.default_candidates).len > 0; + if (zenai.provider.detectKeys(&buf, zenai.provider.default_candidates).len > 0) return true; + return detectOllama(allocator, opts.base_url) != null; } /// Precedence: `--provider` > remembered (if its key is still set) > first /// detected. Null means no key at all (the reason is already printed). -pub fn resolveCredentials(opts: Config.Agent, remembered: ?Remembered, allow_pick: bool) !?ResolvedProvider { +pub fn resolveCredentials(allocator: std.mem.Allocator, opts: Config.Agent, remembered: ?Remembered, allow_pick: bool) !?ResolvedProvider { if (opts.provider) |p| { const key = zenai.provider.envApiKey(p) orelse { std.debug.print( @@ -69,9 +81,12 @@ pub fn resolveCredentials(opts: Config.Agent, remembered: ?Remembered, allow_pic var buf: [zenai.provider.default_candidates.len]Credentials = undefined; const found = zenai.provider.detectKeys(&buf, zenai.provider.default_candidates); if (found.len == 0) { + if (detectOllama(allocator, opts.base_url)) |creds| { + return .{ .credentials = creds, .source = .detected }; + } std.debug.print( - \\No API key detected. Set {s}. - \\If you want to use the REPL in basic mode (without LLM integration) you can pass the --no-llm option. + \\No API key detected. Set {s}, or run a local Ollama server with a pulled model. + \\To use the basic REPL (without LLM integration), pass the --no-llm option. \\ , .{api_keys_hint}); return null; diff --git a/src/help.zon b/src/help.zon index 4289b174..0fd0cdf0 100644 --- a/src/help.zon +++ b/src/help.zon @@ -131,7 +131,7 @@ \\Examples: \\ {0s} agent (auto-detects API key from env) \\ {0s} agent --provider anthropic --model claude-sonnet-4-6 - \\ {0s} agent --provider ollama --model qwen3.5:9b + \\ {0s} agent --provider ollama --model qwen3.5:latest \\ {0s} agent --no-llm (basic slash-command-only REPL) \\ {0s} agent script.js (run a saved script, then exit) \\