mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-25 00:59:28 -04:00
fix(vllm): fail Score cleanly when the engine returns no prompt_logprobs
Audit of the Score path against vllm-metal (MLX on macOS): the engine accepts SamplingParams(prompt_logprobs=1) but returns an all-None prompt_logprobs list rather than computing it, so scoring is not supported there. The old guard treated the truthy [None] list as valid and silently scored every candidate as 0. Detect the all-None case and return UNIMPLEMENTED instead. No-op on Linux/CUDA, which populate real entries. Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Assisted-by: Claude:opus-4.8 [Claude Code]
This commit is contained in:
@@ -457,9 +457,14 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if last_output is None or not getattr(last_output, "prompt_logprobs", None):
|
||||
context.set_code(grpc.StatusCode.INTERNAL)
|
||||
context.set_details("vLLM did not return prompt_logprobs")
|
||||
_pl = getattr(last_output, "prompt_logprobs", None) if last_output is not None else None
|
||||
# Some engines accept the prompt_logprobs request but return a
|
||||
# list of all-None entries instead of computing them (observed
|
||||
# with vllm-metal's MLX backend on macOS). Treat that as
|
||||
# unsupported rather than silently scoring every candidate as 0.
|
||||
if not _pl or all(e is None for e in _pl):
|
||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||
context.set_details("This backend did not return prompt_logprobs; scoring is unsupported on this engine (e.g. vllm-metal / MLX on macOS).")
|
||||
return backend_pb2.ScoreResponse()
|
||||
|
||||
prompt_logprobs = last_output.prompt_logprobs
|
||||
|
||||
Reference in New Issue
Block a user