diff --git a/backend/cpp/llama-cpp/grpc-server.cpp b/backend/cpp/llama-cpp/grpc-server.cpp index 3f33c74bf..1009d36fd 100644 --- a/backend/cpp/llama-cpp/grpc-server.cpp +++ b/backend/cpp/llama-cpp/grpc-server.cpp @@ -358,9 +358,7 @@ static void params_parse(server_context& /*ctx_server*/, const backend::ModelOpt params.model.path = request->modelfile(); if (!request->mmproj().empty()) { - // get the directory of modelfile - std::string model_dir = params.model.path.substr(0, params.model.path.find_last_of("/\\")); - params.mmproj.path = model_dir + "/"+ request->mmproj(); + params.mmproj.path = request->mmproj(); } // params.model_alias ?? params.model_alias = request->modelfile(); diff --git a/core/backend/options.go b/core/backend/options.go index b585a22b3..f3d5a4ccd 100644 --- a/core/backend/options.go +++ b/core/backend/options.go @@ -36,7 +36,7 @@ func ModelOptions(c config.ModelConfig, so *config.ApplicationConfig, opts ...mo c.Threads = &threads - grpcOpts := grpcModelOpts(c) + grpcOpts := grpcModelOpts(c, so.SystemState.Model.ModelsPath) defOpts = append(defOpts, model.WithLoadGRPCLoadModelOpts(grpcOpts)) if so.ParallelBackendRequests { @@ -72,7 +72,7 @@ func getSeed(c config.ModelConfig) int32 { return seed } -func grpcModelOpts(c config.ModelConfig) *pb.ModelOptions { +func grpcModelOpts(c config.ModelConfig, modelPath string) *pb.ModelOptions { b := 512 if c.Batch != 0 { b = c.Batch @@ -131,7 +131,7 @@ func grpcModelOpts(c config.ModelConfig) *pb.ModelOptions { }) } - return &pb.ModelOptions{ + opts := &pb.ModelOptions{ CUDA: c.CUDA || c.Diffusers.CUDA, SchedulerType: c.Diffusers.SchedulerType, GrammarTriggers: triggers, @@ -170,7 +170,6 @@ func grpcModelOpts(c config.ModelConfig) *pb.ModelOptions { LimitImagePerPrompt: int32(c.LimitMMPerPrompt.LimitImagePerPrompt), LimitVideoPerPrompt: int32(c.LimitMMPerPrompt.LimitVideoPerPrompt), LimitAudioPerPrompt: int32(c.LimitMMPerPrompt.LimitAudioPerPrompt), - MMProj: c.MMProj, FlashAttention: flashAttention, CacheTypeKey: c.CacheTypeK, CacheTypeValue: c.CacheTypeV, @@ -198,6 +197,12 @@ func grpcModelOpts(c config.ModelConfig) *pb.ModelOptions { // RWKV Tokenizer: c.Tokenizer, } + + if c.MMProj != "" { + opts.MMProj = filepath.Join(modelPath, c.MMProj) + } + + return opts } func gRPCPredictOpts(c config.ModelConfig, modelPath string) *pb.PredictOptions {