mirror of
https://github.com/mudler/LocalAI.git
synced 2026-01-19 11:51:39 -05:00
* feat(loader): refactor single active backend support to LRU This changeset introduces LRU management of loaded backends. Users can set now a maximum number of models to be loaded concurrently, and, when setting LocalAI in single active backend mode we set LRU to 1 for backward compatibility. Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * chore: add tests Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Update docs Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
39 lines
936 B
Go
39 lines
936 B
Go
package backend
|
|
|
|
import (
|
|
"github.com/mudler/LocalAI/core/config"
|
|
"github.com/mudler/LocalAI/core/schema"
|
|
"github.com/mudler/LocalAI/pkg/grpc"
|
|
"github.com/mudler/LocalAI/pkg/model"
|
|
)
|
|
|
|
func ModelTokenize(s string, loader *model.ModelLoader, modelConfig config.ModelConfig, appConfig *config.ApplicationConfig) (schema.TokenizeResponse, error) {
|
|
|
|
var inferenceModel grpc.Backend
|
|
var err error
|
|
|
|
opts := ModelOptions(modelConfig, appConfig)
|
|
inferenceModel, err = loader.Load(opts...)
|
|
if err != nil {
|
|
return schema.TokenizeResponse{}, err
|
|
}
|
|
|
|
predictOptions := gRPCPredictOpts(modelConfig, loader.ModelPath)
|
|
predictOptions.Prompt = s
|
|
|
|
// tokenize the string
|
|
resp, err := inferenceModel.TokenizeString(appConfig.Context, predictOptions)
|
|
if err != nil {
|
|
return schema.TokenizeResponse{}, err
|
|
}
|
|
|
|
if resp.Tokens == nil {
|
|
resp.Tokens = make([]int32, 0)
|
|
}
|
|
|
|
return schema.TokenizeResponse{
|
|
Tokens: resp.Tokens,
|
|
}, nil
|
|
|
|
}
|