mirror of
https://github.com/mudler/LocalAI.git
synced 2026-03-30 20:52:11 -04:00
fix(llama-cpp): correctly calculate embeddings (#6259)
* chore(tests): check embeddings differs in llama.cpp Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(llama.cpp): use the correct field for embedding Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(llama.cpp): use embedding type none Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * chore(tests): add test-cases in aio-e2e suite Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
committed by
GitHub
parent
55766d269b
commit
6410c99bf2
@@ -701,7 +701,7 @@ public:
|
||||
*/
|
||||
|
||||
// for the shape of input/content, see tokenize_input_prompts()
|
||||
json prompt = body.at("prompt");
|
||||
json prompt = body.at("embeddings");
|
||||
|
||||
|
||||
auto tokenized_prompts = tokenize_input_prompts(ctx_server.vocab, ctx_server.mctx, prompt, true, true);
|
||||
@@ -712,6 +712,7 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
int embd_normalize = 2; // default to Euclidean/L2 norm
|
||||
// create and queue the task
|
||||
json responses = json::array();
|
||||
bool error = false;
|
||||
@@ -725,9 +726,8 @@ public:
|
||||
task.index = i;
|
||||
task.prompt_tokens = std::move(tokenized_prompts[i]);
|
||||
|
||||
// OAI-compat
|
||||
task.params.oaicompat = OAICOMPAT_TYPE_EMBEDDING;
|
||||
|
||||
task.params.oaicompat = OAICOMPAT_TYPE_NONE;
|
||||
task.params.embd_normalize = embd_normalize;
|
||||
tasks.push_back(std::move(task));
|
||||
}
|
||||
|
||||
@@ -743,9 +743,8 @@ public:
|
||||
responses.push_back(res->to_json());
|
||||
}
|
||||
}, [&](const json & error_data) {
|
||||
return grpc::Status(grpc::StatusCode::INVALID_ARGUMENT, error_data.value("content", ""));
|
||||
error = true;
|
||||
}, [&]() {
|
||||
// NOTE: we should try to check when the writer is closed here
|
||||
return false;
|
||||
});
|
||||
|
||||
@@ -755,12 +754,36 @@ public:
|
||||
return grpc::Status(grpc::StatusCode::INTERNAL, "Error in receiving results");
|
||||
}
|
||||
|
||||
std::vector<float> embeddings = responses[0].value("embedding", std::vector<float>());
|
||||
// loop the vector and set the embeddings results
|
||||
for (int i = 0; i < embeddings.size(); i++) {
|
||||
embeddingResult->add_embeddings(embeddings[i]);
|
||||
std::cout << "[DEBUG] Responses size: " << responses.size() << std::endl;
|
||||
|
||||
// Process the responses and extract embeddings
|
||||
for (const auto & response_elem : responses) {
|
||||
// Check if the response has an "embedding" field
|
||||
if (response_elem.contains("embedding")) {
|
||||
json embedding_data = json_value(response_elem, "embedding", json::array());
|
||||
|
||||
if (embedding_data.is_array() && !embedding_data.empty()) {
|
||||
for (const auto & embedding_vector : embedding_data) {
|
||||
if (embedding_vector.is_array()) {
|
||||
for (const auto & embedding_value : embedding_vector) {
|
||||
embeddingResult->add_embeddings(embedding_value.get<float>());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Check if the response itself contains the embedding data directly
|
||||
if (response_elem.is_array()) {
|
||||
for (const auto & embedding_value : response_elem) {
|
||||
embeddingResult->add_embeddings(embedding_value.get<float>());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
return grpc::Status::OK;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user