mirror of
https://github.com/mudler/LocalAI.git
synced 2026-03-30 20:52:11 -04:00
fix: reranking models limited to 512 tokens in llama.cpp backend (#6344)
Fix reranking models being limited to 512 tokens input in llama.cpp backend Signed-off-by: JonGames <18472148+jongames@users.noreply.github.com>
This commit is contained in:
@@ -231,6 +231,7 @@ static void params_parse(const backend::ModelOptions* request,
|
||||
params.cpuparams.n_threads = request->threads();
|
||||
params.n_gpu_layers = request->ngpulayers();
|
||||
params.n_batch = request->nbatch();
|
||||
params.n_ubatch = request->nbatch(); // fixes issue with reranking models being limited to 512 tokens (the default n_ubatch size); allows for setting the maximum input amount of tokens thereby avoiding this error "input is too large to process. increase the physical batch size"
|
||||
// Set params.n_parallel by environment variable (LLAMA_PARALLEL), defaults to 1
|
||||
//params.n_parallel = 1;
|
||||
const char *env_parallel = std::getenv("LLAMACPP_PARALLEL");
|
||||
|
||||
Reference in New Issue
Block a user