mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-15 12:18:58 -04:00
fix(mlx): strip file:// LocalPrefix before loading filesystem-imported models
MLX backends passed request.Model verbatim to mlx_lm/mlx_vlm load(). For a model imported from the filesystem, LocalAI hands the backend a file:// URI (its LocalPrefix), which load() rejects: the scheme is neither a valid HF repo id nor an existing path (Path(model).exists() fails on the scheme), producing "Repo id must be in the form 'repo_name' or 'namespace/repo_name' ... Use repo_type argument if needed". Add a pure, unit-testable resolve_model_path(model, model_file) helper in the shared python_utils: it prefers the resolved ModelFile, strips a file:// scheme and percent-decodes the path, and leaves plain repo ids and local paths untouched. Wire it into the mlx, mlx-vlm and mlx-distributed backends (load, model_key, and the distributed broadcast all use the normalized path). Fixes #7461. Assisted-by: claude:claude-opus-4-8 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
@@ -17,7 +17,7 @@ import grpc
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'common'))
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'common'))
|
||||
from grpc_auth import get_auth_interceptors
|
||||
from python_utils import messages_to_dicts, parse_options
|
||||
from python_utils import messages_to_dicts, parse_options, resolve_model_path
|
||||
from mlx_utils import parse_tool_calls, split_reasoning
|
||||
|
||||
from mlx_lm import load, stream_generate
|
||||
@@ -63,7 +63,11 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
backend_pb2.Result: The load model result.
|
||||
"""
|
||||
try:
|
||||
print(f"Loading MLX model: {request.Model}", file=sys.stderr)
|
||||
# Normalize the model reference: strip LocalAI's file:// LocalPrefix
|
||||
# and prefer the resolved ModelFile so mlx_lm.load() gets a plain
|
||||
# repo id or filesystem path (it rejects file:// URIs).
|
||||
model_path = resolve_model_path(request.Model, request.ModelFile)
|
||||
print(f"Loading MLX model: {model_path}", file=sys.stderr)
|
||||
print(f"Request: {request}", file=sys.stderr)
|
||||
|
||||
# Parse Options[] key:value strings into a typed dict (shared helper)
|
||||
@@ -89,9 +93,9 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
# Load model and tokenizer using MLX
|
||||
if tokenizer_config:
|
||||
print(f"Loading with tokenizer_config: {tokenizer_config}", file=sys.stderr)
|
||||
self.model, self.tokenizer = load(request.Model, tokenizer_config=tokenizer_config)
|
||||
self.model, self.tokenizer = load(model_path, tokenizer_config=tokenizer_config)
|
||||
else:
|
||||
self.model, self.tokenizer = load(request.Model)
|
||||
self.model, self.tokenizer = load(model_path)
|
||||
|
||||
# mlx_lm.load() returns a TokenizerWrapper that detects tool
|
||||
# calling and thinking markers from the chat template / vocab.
|
||||
@@ -111,7 +115,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
# Initialize thread-safe LRU prompt cache for efficient generation
|
||||
max_cache_entries = self.options.get("max_cache_entries", 10)
|
||||
self.max_kv_size = self.options.get("max_kv_size", None)
|
||||
self.model_key = request.Model
|
||||
self.model_key = model_path
|
||||
self.lru_cache = ThreadSafeLRUPromptCache(
|
||||
max_size=max_cache_entries,
|
||||
can_trim_fn=can_trim_prompt_cache,
|
||||
|
||||
@@ -12,7 +12,7 @@ import backend_pb2_grpc
|
||||
# Make the shared helpers importable so we can unit-test them without a
|
||||
# running gRPC server.
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'common'))
|
||||
from python_utils import messages_to_dicts, parse_options
|
||||
from python_utils import messages_to_dicts, parse_options, resolve_model_path
|
||||
from mlx_utils import parse_tool_calls, split_reasoning
|
||||
|
||||
class TestBackendServicer(unittest.TestCase):
|
||||
@@ -322,6 +322,42 @@ class TestSharedHelpers(unittest.TestCase):
|
||||
self.assertEqual(r, "")
|
||||
self.assertEqual(c, "just text")
|
||||
|
||||
def test_resolve_model_path_file_uri(self):
|
||||
# file:// LocalPrefix (LocalAI import) is stripped to a plain path.
|
||||
self.assertEqual(resolve_model_path("file:///a/b"), "/a/b")
|
||||
|
||||
def test_resolve_model_path_file_uri_percent_decoded(self):
|
||||
# Percent-encoded characters (e.g. spaces) are decoded.
|
||||
self.assertEqual(
|
||||
resolve_model_path("file:///Users/me/My%20Models/Qwen3"),
|
||||
"/Users/me/My Models/Qwen3",
|
||||
)
|
||||
|
||||
def test_resolve_model_path_hf_repo_id_unchanged(self):
|
||||
# Plain HuggingFace repo ids must pass through untouched.
|
||||
self.assertEqual(
|
||||
resolve_model_path("mlx-community/Qwen3-Coder-30B"),
|
||||
"mlx-community/Qwen3-Coder-30B",
|
||||
)
|
||||
|
||||
def test_resolve_model_path_local_path_unchanged(self):
|
||||
# An already-local absolute path is left as-is.
|
||||
self.assertEqual(resolve_model_path("/models/Qwen3"), "/models/Qwen3")
|
||||
|
||||
def test_resolve_model_path_prefers_model_file(self):
|
||||
# The resolved ModelFile wins over Model when both are set.
|
||||
self.assertEqual(
|
||||
resolve_model_path("file:///ignored", "/resolved/local/path"),
|
||||
"/resolved/local/path",
|
||||
)
|
||||
|
||||
def test_resolve_model_path_model_file_file_uri(self):
|
||||
# A ModelFile that is itself a file:// URI is also normalized.
|
||||
self.assertEqual(
|
||||
resolve_model_path("ignored", "file:///a/b"),
|
||||
"/a/b",
|
||||
)
|
||||
|
||||
def test_parse_tool_calls_with_shim(self):
|
||||
tm = types.SimpleNamespace(
|
||||
tool_call_start="<tool_call>",
|
||||
|
||||
Reference in New Issue
Block a user