From 5e3774dfe313bebdb910f9e6c7954caf773e8191 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 24 Jun 2026 21:31:41 +0000 Subject: [PATCH] fix(vllm): fail Score cleanly when the engine returns no prompt_logprobs Audit of the Score path against vllm-metal (MLX on macOS): the engine accepts SamplingParams(prompt_logprobs=1) but returns an all-None prompt_logprobs list rather than computing it, so scoring is not supported there. The old guard treated the truthy [None] list as valid and silently scored every candidate as 0. Detect the all-None case and return UNIMPLEMENTED instead. No-op on Linux/CUDA, which populate real entries. Signed-off-by: Ettore Di Giacinto Assisted-by: Claude:opus-4.8 [Claude Code] --- backend/python/vllm/backend.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/backend/python/vllm/backend.py b/backend/python/vllm/backend.py index a38849137..1e93f26e2 100644 --- a/backend/python/vllm/backend.py +++ b/backend/python/vllm/backend.py @@ -457,9 +457,14 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): except Exception: pass - if last_output is None or not getattr(last_output, "prompt_logprobs", None): - context.set_code(grpc.StatusCode.INTERNAL) - context.set_details("vLLM did not return prompt_logprobs") + _pl = getattr(last_output, "prompt_logprobs", None) if last_output is not None else None + # Some engines accept the prompt_logprobs request but return a + # list of all-None entries instead of computing them (observed + # with vllm-metal's MLX backend on macOS). Treat that as + # unsupported rather than silently scoring every candidate as 0. + if not _pl or all(e is None for e in _pl): + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details("This backend did not return prompt_logprobs; scoring is unsupported on this engine (e.g. vllm-metal / MLX on macOS).") return backend_pb2.ScoreResponse() prompt_logprobs = last_output.prompt_logprobs