From 5e3774dfe313bebdb910f9e6c7954caf773e8191 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Wed, 24 Jun 2026 21:31:41 +0000
Subject: [PATCH] fix(vllm): fail Score cleanly when the engine returns no
 prompt_logprobs

Audit of the Score path against vllm-metal (MLX on macOS): the engine accepts
SamplingParams(prompt_logprobs=1) but returns an all-None prompt_logprobs list
rather than computing it, so scoring is not supported there. The old guard
treated the truthy [None] list as valid and silently scored every candidate as
0. Detect the all-None case and return UNIMPLEMENTED instead. No-op on
Linux/CUDA, which populate real entries.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Assisted-by: Claude:opus-4.8 [Claude Code]
---
 backend/python/vllm/backend.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/backend/python/vllm/backend.py b/backend/python/vllm/backend.py
index a38849137..1e93f26e2 100644
--- a/backend/python/vllm/backend.py
+++ b/backend/python/vllm/backend.py
@@ -457,9 +457,14 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
                     except Exception:
                         pass
 
-                if last_output is None or not getattr(last_output, "prompt_logprobs", None):
-                    context.set_code(grpc.StatusCode.INTERNAL)
-                    context.set_details("vLLM did not return prompt_logprobs")
+                _pl = getattr(last_output, "prompt_logprobs", None) if last_output is not None else None
+                # Some engines accept the prompt_logprobs request but return a
+                # list of all-None entries instead of computing them (observed
+                # with vllm-metal's MLX backend on macOS). Treat that as
+                # unsupported rather than silently scoring every candidate as 0.
+                if not _pl or all(e is None for e in _pl):
+                    context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+                    context.set_details("This backend did not return prompt_logprobs; scoring is unsupported on this engine (e.g. vllm-metal / MLX on macOS).")
                     return backend_pb2.ScoreResponse()
 
                 prompt_logprobs = last_output.prompt_logprobs