agent: auto-detect Ollama and fallback models

Probes local Ollama as a fallback provider when no API keys are set.
Swaps to an installed Ollama model if the default model is missing.
This commit is contained in:
Adrià Arrufat
2026-06-04 18:29:29 +02:00
parent f2a7367e6d
commit ab540e9bdf
3 changed files with 65 additions and 37 deletions

View File

@@ -139,33 +139,41 @@ fn resolveModelName(opts: Config.Agent, resolved: ?settings.ResolvedProvider, re
return "";
}
/// Ollama's local catalog is authoritative and cheap to query, unlike the
/// cloud providers whose `/models` listings can lag actual availability — so
/// only for Ollama do we confirm the model is installed up front instead of
/// letting the first request fail mid-turn (its default model may not be
/// pulled either). Returns false when a one-shot run should abort; a REPL
/// session only warns, since `/model` can still fix it interactively.
fn verifyOllamaModelInstalled(
const OllamaModel = union(enum) {
/// Owned by the allocator passed to reconcileOllamaModel.
use: []u8,
abort,
};
/// Only Ollama: its local catalog is authoritative and cheap, unlike cloud
/// `/models` listings that can lag real availability. A non-explicit (default)
/// model that isn't installed is swapped for an installed one; an explicit one
/// warns, and aborts a one-shot run.
fn reconcileOllamaModel(
allocator: std.mem.Allocator,
llm: Credentials,
model: []const u8,
desired: []const u8,
base_url: ?[:0]const u8,
explicit: bool,
is_one_shot: bool,
) bool {
if (llm.provider != .ollama or model.len == 0) return true;
) !OllamaModel {
var arena: std.heap.ArenaAllocator = .init(allocator);
defer arena.deinit();
// Server unreachable or empty catalog: don't block on what we can't confirm.
const ids = zenai.provider.listChatModelIds(allocator, arena.allocator(), .ollama, llm.key, base_url) catch return true;
if (ids.len == 0 or containsString(ids, model)) return true;
const installed = std.mem.join(arena.allocator(), ", ", ids) catch return true;
std.debug.print(
"Model '{s}' is not installed in Ollama.\nInstalled: {s}\nRun `ollama pull {s}` to install it, or choose one of the above.\n",
.{ model, installed, model },
);
return !is_one_shot;
// Unreachable server → empty list → fall through and use `desired` unchecked.
const ids: []const []const u8 = zenai.provider.listChatModelIds(allocator, arena.allocator(), .ollama, llm.key, base_url) catch &.{};
if (ids.len != 0 and !containsString(ids, desired)) {
if (!explicit) {
std.debug.print("Default Ollama model '{s}' is not installed; using '{s}'.\n", .{ desired, ids[0] });
return .{ .use = try allocator.dupe(u8, ids[0]) };
}
const installed = std.mem.join(arena.allocator(), ", ", ids) catch "";
std.debug.print(
"Model '{s}' is not installed in Ollama.\nInstalled: {s}\nRun `ollama pull {s}` to install it, or choose one of the above.\n",
.{ desired, installed, desired },
);
if (is_one_shot) return .abort;
}
return .{ .use = try allocator.dupe(u8, desired) };
}
pub fn init(allocator: std.mem.Allocator, app: *App, opts: Config.Agent) !*Agent {
@@ -213,11 +221,11 @@ pub fn init(allocator: std.mem.Allocator, app: *App, opts: Config.Agent) !*Agent
// interactive "Select a provider" prompt. On error paths (missing key /
// no key detected) resolveCredentials prints its own message and the
// banner is skipped.
if (will_repl and (!resolve or settings.wouldResolve(opts, remembered))) {
if (will_repl and (!resolve or settings.wouldResolve(allocator, opts, remembered))) {
std.debug.print(Terminal.ansi.bold ++ "\n Lightpanda Agent" ++ Terminal.ansi.reset ++ " " ++ Terminal.ansi.dim ++ "({s})" ++ Terminal.ansi.reset ++ "\n", .{lp.build_config.version});
}
const resolved: ?settings.ResolvedProvider = if (resolve) try settings.resolveCredentials(opts, remembered, will_repl) else null;
const resolved: ?settings.ResolvedProvider = if (resolve) try settings.resolveCredentials(allocator, opts, remembered, will_repl) else null;
const llm: ?Credentials = if (resolved) |r| r.credentials else null;
if (llm == null and requires_llm) {
@@ -227,14 +235,20 @@ pub fn init(allocator: std.mem.Allocator, app: *App, opts: Config.Agent) !*Agent
return error.MissingProvider;
}
const model = try allocator.dupe(u8, resolveModelName(opts, resolved, remembered));
var model = try allocator.dupe(u8, resolveModelName(opts, resolved, remembered));
errdefer allocator.free(model);
if (llm) |l| {
if (!verifyOllamaModelInstalled(allocator, l, model, opts.base_url, is_one_shot)) {
return error.ModelNotInstalled;
if (llm) |l| if (l.provider == .ollama) {
const explicit = opts.model != null or
(remembered != null and remembered.?.provider == .ollama);
switch (try reconcileOllamaModel(allocator, l, model, opts.base_url, explicit, is_one_shot)) {
.use => |m| {
allocator.free(model);
model = m;
},
.abort => return error.ModelNotInstalled,
}
}
};
if (resolved) |r| {
if (r.source == .picked) {
@@ -645,8 +659,7 @@ fn handleModel(self: *Agent, _: std.mem.Allocator, rest: []const u8) void {
return;
}
const ids = completionModels(self, self.allocator);
// Empty means the fetch failed (or a local provider with unlisted models);
// skip the check rather than block a model we just can't confirm.
// Empty list = fetch failed or unlisted local models; can't confirm, so allow.
if (ids.len != 0 and !containsString(ids, trimmed)) {
self.terminal.printError("unknown model: {s} (Tab to list)", .{trimmed});
return;
@@ -1605,7 +1618,7 @@ pub fn listModels(allocator: std.mem.Allocator, opts: Config.Agent) !void {
});
return error.ConflictingFlags;
}
const resolved = (try settings.resolveCredentials(opts, null, false)) orelse return error.MissingProvider;
const resolved = (try settings.resolveCredentials(allocator, opts, null, false)) orelse return error.MissingProvider;
const llm = resolved.credentials;
var arena: std.heap.ArenaAllocator = .init(allocator);

View File

@@ -39,18 +39,30 @@ pub const ResolvedProvider = struct {
source: enum { flag, remembered, detected, picked },
};
/// Ollama needs no API key, so it's excluded from env detection
/// (`default_candidates`) and only probed here as a last resort.
fn detectOllama(allocator: std.mem.Allocator, base_url: ?[:0]const u8) ?Credentials {
const key = zenai.provider.envApiKey(.ollama) orelse return null;
var arena: std.heap.ArenaAllocator = .init(allocator);
defer arena.deinit();
const ids = zenai.provider.listChatModelIds(allocator, arena.allocator(), .ollama, key, base_url) catch return null;
if (ids.len == 0) return null;
return .{ .provider = .ollama, .key = key };
}
/// Returns true when resolveCredentials would succeed (no error, non-null).
/// Used by callers that need to print a banner before calling resolveCredentials.
pub fn wouldResolve(opts: Config.Agent, remembered: ?Remembered) bool {
pub fn wouldResolve(allocator: std.mem.Allocator, opts: Config.Agent, remembered: ?Remembered) bool {
if (opts.provider) |p| return zenai.provider.envApiKey(p) != null;
if (remembered) |r| if (zenai.provider.envApiKey(r.provider)) |_| return true;
var buf: [zenai.provider.default_candidates.len]Credentials = undefined;
return zenai.provider.detectKeys(&buf, zenai.provider.default_candidates).len > 0;
if (zenai.provider.detectKeys(&buf, zenai.provider.default_candidates).len > 0) return true;
return detectOllama(allocator, opts.base_url) != null;
}
/// Precedence: `--provider` > remembered (if its key is still set) > first
/// detected. Null means no key at all (the reason is already printed).
pub fn resolveCredentials(opts: Config.Agent, remembered: ?Remembered, allow_pick: bool) !?ResolvedProvider {
pub fn resolveCredentials(allocator: std.mem.Allocator, opts: Config.Agent, remembered: ?Remembered, allow_pick: bool) !?ResolvedProvider {
if (opts.provider) |p| {
const key = zenai.provider.envApiKey(p) orelse {
std.debug.print(
@@ -69,9 +81,12 @@ pub fn resolveCredentials(opts: Config.Agent, remembered: ?Remembered, allow_pic
var buf: [zenai.provider.default_candidates.len]Credentials = undefined;
const found = zenai.provider.detectKeys(&buf, zenai.provider.default_candidates);
if (found.len == 0) {
if (detectOllama(allocator, opts.base_url)) |creds| {
return .{ .credentials = creds, .source = .detected };
}
std.debug.print(
\\No API key detected. Set {s}.
\\If you want to use the REPL in basic mode (without LLM integration) you can pass the --no-llm option.
\\No API key detected. Set {s}, or run a local Ollama server with a pulled model.
\\To use the basic REPL (without LLM integration), pass the --no-llm option.
\\
, .{api_keys_hint});
return null;

View File

@@ -131,7 +131,7 @@
\\Examples:
\\ {0s} agent (auto-detects API key from env)
\\ {0s} agent --provider anthropic --model claude-sonnet-4-6
\\ {0s} agent --provider ollama --model qwen3.5:9b
\\ {0s} agent --provider ollama --model qwen3.5:latest
\\ {0s} agent --no-llm (basic slash-command-only REPL)
\\ {0s} agent script.js (run a saved script, then exit)
\\