fix(generation): unecessary casting [ec2 build] [wheel build]

This breaks when compiled to cfunc Signed-off-by: aarnphm-ec2-dev <29749331+aarnphm@users.noreply.github.com>
2026-06-11 18:09:52 -04:00 · 2023-08-17 16:33:43 +00:00
parent a1ba6b7c6a
commit d5c4066ff4
1 changed files with 1 additions and 1 deletions
--- a/openllm-python/src/openllm/_generation.py
+++ b/openllm-python/src/openllm/_generation.py
@@ -11,7 +11,7 @@ class StopSequenceCriteria(transformers.StoppingCriteria):
    self.stop_sequences, self.tokenizer = stop_sequences, tokenizer
  def __call__(self, input_ids: torch.Tensor, scores: t.Any, **_: t.Any) -> bool: return any(self.tokenizer.decode(input_ids.tolist()[0]).endswith(stop_sequence) for stop_sequence in self.stop_sequences)
 class StopOnTokens(transformers.StoppingCriteria):
-  def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **_: t.Any) -> bool: return t.cast(int, input_ids[0][-1]) in {50278, 50279, 50277, 1, 0}
+  def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **_: t.Any) -> bool: return input_ids[0][-1] in {50278, 50279, 50277, 1, 0}
 def prepare_logits_processor(config: openllm.LLMConfig) -> transformers.LogitsProcessorList:
  generation_config = config.generation_config
  logits_processor = transformers.LogitsProcessorList()