ci(dockerhub): push images also to dockerhub (#1542 )

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
⬆️ Update ggerganov/llama.cpp (#1544 )
2026-02-03 11:13:31 -05:00 · 2024-01-04 08:32:29 +01:00 · 2024-01-04 00:08:03 +01:00 · 2024-01-04 00:06:41 +01:00 · 2024-01-03 08:43:35 +01:00 · 2024-01-02 00:28:22 +00:00
25 changed files with 407 additions and 139 deletions
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -27,8 +27,10 @@ jobs:
      platforms: ${{ matrix.platforms }}
      runs-on: ${{ matrix.runs-on }}
    secrets:
-      dockerUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
-      dockerPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
+      dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
+      dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
+      quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
+      quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
    strategy:
      # Pushing with all jobs in parallel
      # eats the bandwidth of all the nodes
@@ -107,8 +109,10 @@ jobs:
      platforms: ${{ matrix.platforms }}
      runs-on: ${{ matrix.runs-on }}
    secrets:
-      dockerUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
-      dockerPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
+      dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
+      dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
+      quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
+      quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
    strategy:
      matrix:
        include:
--- a/.github/workflows/image_build.yml
+++ b/.github/workflows/image_build.yml
@@ -46,6 +46,10 @@ on:
        required: true
      dockerPassword:
        required: true
+      quayUsername:
+        required: true
+      quayPassword:
+        required: true
 jobs:
  reusable_image-build:
    runs-on: ${{ inputs.runs-on }}
@@ -100,7 +104,9 @@ jobs:
        id: meta
        uses: docker/metadata-action@v5
        with:
-          images: quay.io/go-skynet/local-ai
+          images: |
+            quay.io/go-skynet/local-ai
+            localai/localai
          tags: |
            type=ref,event=branch
            type=semver,pattern={{raw}}
@@ -122,10 +128,17 @@ jobs:
        if: github.event_name != 'pull_request'
        uses: docker/login-action@v3
        with:
-          registry: quay.io
          username: ${{ secrets.dockerUsername }}
          password: ${{ secrets.dockerPassword }}

+      - name: Login to DockerHub
+        if: github.event_name != 'pull_request'
+        uses: docker/login-action@v3
+        with:
+          registry: quay.io
+          username: ${{ secrets.quayUsername }}
+          password: ${{ secrets.quayPassword }}
+
      - name: Build and push
        uses: docker/build-push-action@v5
        with:
--- a/2
+++ b/2
@@ -8,7 +8,7 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0

 GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7

-CPPLLAMA_VERSION?=65e5f6dadbba4b496bba27f573e473c66b446496
+CPPLLAMA_VERSION?=cb1e2818e0e12ec99f7236ec5d4f3ffd8bcc2f4a

 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
--- a/README.md
+++ b/README.md
@@ -40,6 +40,7 @@

 [Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)

+- Start and share models with config file: https://github.com/mudler/LocalAI/pull/1522
 - 🐸 Coqui: https://github.com/mudler/LocalAI/pull/1489
 - Inline templates: https://github.com/mudler/LocalAI/pull/1452
 - Mixtral: https://github.com/mudler/LocalAI/pull/1449
--- a/api/api.go
+++ b/api/api.go
@@ -5,6 +5,7 @@ import (
 	"errors"
 	"fmt"
 	"os"
+	"path/filepath"
 	"strings"

 	config "github.com/go-skynet/LocalAI/api/config"
@@ -16,6 +17,7 @@ import (
 	"github.com/go-skynet/LocalAI/metrics"
 	"github.com/go-skynet/LocalAI/pkg/assets"
 	"github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/go-skynet/LocalAI/pkg/utils"

 	"github.com/gofiber/fiber/v2"
 	"github.com/gofiber/fiber/v2/middleware/cors"
@@ -36,6 +38,26 @@ func Startup(opts ...options.AppOption) (*options.Option, *config.ConfigLoader,
 	log.Info().Msgf("Starting LocalAI using %d threads, with models path: %s", options.Threads, options.Loader.ModelPath)
 	log.Info().Msgf("LocalAI version: %s", internal.PrintableVersion())

+	modelPath := options.Loader.ModelPath
+	if len(options.ModelsURL) > 0 {
+		for _, url := range options.ModelsURL {
+			if utils.LooksLikeURL(url) {
+				// md5 of model name
+				md5Name := utils.MD5(url)
+
+				// check if file exists
+				if _, err := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(err, os.ErrNotExist) {
+					err := utils.DownloadFile(url, filepath.Join(modelPath, md5Name)+".yaml", "", func(fileName, current, total string, percent float64) {
+						utils.DisplayDownloadFunction(fileName, current, total, percent)
+					})
+					if err != nil {
+						log.Error().Msgf("error loading model: %s", err.Error())
+					}
+				}
+			}
+		}
+	}
+
 	cl := config.NewConfigLoader()
 	if err := cl.LoadConfigs(options.Loader.ModelPath); err != nil {
 		log.Error().Msgf("error loading config files: %s", err.Error())
@@ -51,6 +73,18 @@ func Startup(opts ...options.AppOption) (*options.Option, *config.ConfigLoader,
 		log.Error().Msgf("error downloading models: %s", err.Error())
 	}

+	if options.PreloadJSONModels != "" {
+		if err := localai.ApplyGalleryFromString(options.Loader.ModelPath, options.PreloadJSONModels, cl, options.Galleries); err != nil {
+			return nil, nil, err
+		}
+	}
+
+	if options.PreloadModelsFromPath != "" {
+		if err := localai.ApplyGalleryFromFile(options.Loader.ModelPath, options.PreloadModelsFromPath, cl, options.Galleries); err != nil {
+			return nil, nil, err
+		}
+	}
+
 	if options.Debug {
 		for _, v := range cl.ListConfigs() {
 			cfg, _ := cl.GetConfig(v)
@@ -67,18 +101,6 @@ func Startup(opts ...options.AppOption) (*options.Option, *config.ConfigLoader,
 		}
 	}

-	if options.PreloadJSONModels != "" {
-		if err := localai.ApplyGalleryFromString(options.Loader.ModelPath, options.PreloadJSONModels, cl, options.Galleries); err != nil {
-			return nil, nil, err
-		}
-	}
-
-	if options.PreloadModelsFromPath != "" {
-		if err := localai.ApplyGalleryFromFile(options.Loader.ModelPath, options.PreloadModelsFromPath, cl, options.Galleries); err != nil {
-			return nil, nil, err
-		}
-	}
-
 	// turn off any process that was started by GRPC if the context is canceled
 	go func() {
 		<-options.Context.Done()
--- a/api/backend/llm.go
+++ b/api/backend/llm.go
@@ -159,6 +159,9 @@ func Finetune(config config.Config, input, prediction string) string {
 	for _, c := range config.TrimSpace {
 		prediction = strings.TrimSpace(strings.TrimPrefix(prediction, c))
 	}
-	return prediction

+	for _, c := range config.TrimSuffix {
+		prediction = strings.TrimSpace(strings.TrimSuffix(prediction, c))
+	}
+	return prediction
 }
--- a/api/config/config.go
+++ b/api/config/config.go
@@ -1,6 +1,7 @@
 package api_config

 import (
+	"errors"
 	"fmt"
 	"io/fs"
 	"os"
@@ -51,6 +52,14 @@ type Config struct {
 	// CUDA
 	// Explicitly enable CUDA or not (some backends might need it)
 	CUDA bool `yaml:"cuda"`
+
+	DownloadFiles []File `yaml:"download_files"`
+}
+
+type File struct {
+	Filename string `yaml:"filename" json:"filename"`
+	SHA256   string `yaml:"sha256" json:"sha256"`
+	URI      string `yaml:"uri" json:"uri"`
 }

 type VallE struct {
@@ -102,16 +111,18 @@ type LLMConfig struct {
 	StopWords       []string `yaml:"stopwords"`
 	Cutstrings      []string `yaml:"cutstrings"`
 	TrimSpace       []string `yaml:"trimspace"`
-	ContextSize     int      `yaml:"context_size"`
-	NUMA            bool     `yaml:"numa"`
-	LoraAdapter     string   `yaml:"lora_adapter"`
-	LoraBase        string   `yaml:"lora_base"`
-	LoraScale       float32  `yaml:"lora_scale"`
-	NoMulMatQ       bool     `yaml:"no_mulmatq"`
-	DraftModel      string   `yaml:"draft_model"`
-	NDraft          int32    `yaml:"n_draft"`
-	Quantization    string   `yaml:"quantization"`
-	MMProj          string   `yaml:"mmproj"`
+	TrimSuffix      []string `yaml:"trimsuffix"`
+
+	ContextSize  int     `yaml:"context_size"`
+	NUMA         bool    `yaml:"numa"`
+	LoraAdapter  string  `yaml:"lora_adapter"`
+	LoraBase     string  `yaml:"lora_base"`
+	LoraScale    float32 `yaml:"lora_scale"`
+	NoMulMatQ    bool    `yaml:"no_mulmatq"`
+	DraftModel   string  `yaml:"draft_model"`
+	NDraft       int32   `yaml:"n_draft"`
+	Quantization string  `yaml:"quantization"`
+	MMProj       string  `yaml:"mmproj"`

 	RopeScaling    string  `yaml:"rope_scaling"`
 	YarnExtFactor  float32 `yaml:"yarn_ext_factor"`
@@ -266,22 +277,44 @@ func (cm *ConfigLoader) ListConfigs() []string {
 	return res
 }

+// Preload prepare models if they are not local but url or huggingface repositories
 func (cm *ConfigLoader) Preload(modelPath string) error {
 	cm.Lock()
 	defer cm.Unlock()

+	status := func(fileName, current, total string, percent float64) {
+		utils.DisplayDownloadFunction(fileName, current, total, percent)
+	}
+
+	log.Info().Msgf("Preloading models from %s", modelPath)
+
 	for i, config := range cm.configs {
+
+		// Download files and verify their SHA
+		for _, file := range config.DownloadFiles {
+			log.Debug().Msgf("Checking %q exists and matches SHA", file.Filename)
+
+			if err := utils.VerifyPath(file.Filename, modelPath); err != nil {
+				return err
+			}
+			// Create file path
+			filePath := filepath.Join(modelPath, file.Filename)
+
+			if err := utils.DownloadFile(file.URI, filePath, file.SHA256, status); err != nil {
+				return err
+			}
+		}
+
 		modelURL := config.PredictionOptions.Model
 		modelURL = utils.ConvertURL(modelURL)
-		if strings.HasPrefix(modelURL, "http://") || strings.HasPrefix(modelURL, "https://") {
+
+		if utils.LooksLikeURL(modelURL) {
 			// md5 of model name
 			md5Name := utils.MD5(modelURL)

 			// check if file exists
-			if _, err := os.Stat(filepath.Join(modelPath, md5Name)); err == os.ErrNotExist {
-				err := utils.DownloadFile(modelURL, filepath.Join(modelPath, md5Name), "", func(fileName, current, total string, percent float64) {
-					log.Info().Msgf("Downloading %s: %s/%s (%.2f%%)", fileName, current, total, percent)
-				})
+			if _, err := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(err, os.ErrNotExist) {
+				err := utils.DownloadFile(modelURL, filepath.Join(modelPath, md5Name), "", status)
 				if err != nil {
 					return err
 				}
--- a/api/localai/gallery.go
+++ b/api/localai/gallery.go
@@ -130,6 +130,12 @@ func (g *galleryApplier) Start(c context.Context, cm *config.ConfigLoader) {
 					continue
 				}

+				err = cm.Preload(g.modelPath)
+				if err != nil {
+					updateError(err)
+					continue
+				}
+
 				g.updateStatus(op.id, &galleryOpStatus{Processed: true, Message: "completed", Progress: 100})
 			}
 		}
--- a/api/options/options.go
+++ b/api/options/options.go
@@ -40,9 +40,12 @@ type Option struct {
 	SingleBackend           bool
 	ParallelBackendRequests bool

-	WatchDogIdle                             bool
-	WatchDogBusy                             bool
-	WatchDog                                 bool
+	WatchDogIdle bool
+	WatchDogBusy bool
+	WatchDog     bool
+
+	ModelsURL []string
+
 	WatchDogBusyTimeout, WatchDogIdleTimeout time.Duration
 }

@@ -63,6 +66,12 @@ func NewOptions(o ...AppOption) *Option {
 	return opt
 }

+func WithModelsURL(urls ...string) AppOption {
+	return func(o *Option) {
+		o.ModelsURL = urls
+	}
+}
+
 func WithCors(b bool) AppOption {
 	return func(o *Option) {
 		o.CORS = b
--- a/backend/cpp/llama/CMakeLists.txt
+++ b/backend/cpp/llama/CMakeLists.txt
@@ -17,9 +17,17 @@ cmake_minimum_required(VERSION 3.15)
 set(TARGET grpc-server)
 set(_PROTOBUF_LIBPROTOBUF libprotobuf)
 set(_REFLECTION grpc++_reflection)
+
 if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
-    link_directories("/opt/homebrew/lib")
-    include_directories("/opt/homebrew/include")
+    # Set correct Homebrew install folder for Apple Silicon and Intel Macs
+    if (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "arm64")
+        set(HOMEBREW_DEFAULT_PREFIX "/opt/homebrew")
+    else()
+        set(HOMEBREW_DEFAULT_PREFIX "/usr/local")
+    endif()
+
+    link_directories("${HOMEBREW_DEFAULT_PREFIX}/lib")
+    include_directories("${HOMEBREW_DEFAULT_PREFIX}/include")
 endif()

 find_package(absl CONFIG REQUIRED)
--- a/backend/cpp/llama/grpc-server.cpp
+++ b/backend/cpp/llama/grpc-server.cpp
@@ -26,6 +26,7 @@
 #include <mutex>
 #include <chrono>
 #include <regex>
+#include <condition_variable>
 #include <grpcpp/ext/proto_server_reflection_plugin.h>
 #include <grpcpp/grpcpp.h>
 #include <grpcpp/health_check_service_interface.h>
@@ -40,12 +41,15 @@ using backend::HealthMessage;


 ///// LLAMA.CPP server code below
+
 #define DEFAULT_OAICOMPAT_MODEL "gpt-3.5-turbo-0613"
+
 using json = nlohmann::json;

 struct server_params
 {
    std::string hostname = "127.0.0.1";
+    std::string api_key;
    std::string public_path = "examples/server/public";
    int32_t port = 8080;
    int32_t read_timeout = 600;
@@ -89,7 +93,7 @@ static inline bool is_base64(uint8_t c)
    return (isalnum(c) || (c == '+') || (c == '/'));
 }

-static std::vector<uint8_t> base64_decode(std::string const &encoded_string)
+static std::vector<uint8_t> base64_decode(const std::string & encoded_string)
 {
    int i = 0;
    int j = 0;
@@ -216,10 +220,10 @@ struct slot_image
    int32_t id;

    bool request_encode_image = false;
-    float* image_embedding = nullptr;
+    float * image_embedding = nullptr;
    int32_t image_tokens = 0;

-    clip_image_u8 img_data;
+    clip_image_u8 * img_data;

    std::string prefix_prompt; // before of this image
 };
@@ -441,15 +445,16 @@ struct llama_client_slot

        generated_token_probs.clear();

-        for (slot_image &img : images)
+        for (slot_image & img : images)
        {
            free(img.image_embedding);
-            delete[] img.img_data.data;
+            if (img.img_data) {
+                clip_image_u8_free(img.img_data);
+            }
            img.prefix_prompt = "";
        }

        images.clear();
-        // llama_set_rng_seed(ctx, params.seed); in batched the seed matter???????
    }

    bool has_budget(gpt_params &global_params) {
@@ -550,7 +555,9 @@ struct llama_server_context
    std::vector<task_result> queue_results;
    std::vector<task_multi>  queue_multitasks;
    std::mutex mutex_tasks; // also guards id_gen, and queue_multitasks
+    std::condition_variable condition_tasks;
    std::mutex mutex_results;
+    std::condition_variable condition_results;

    ~llama_server_context()
    {
@@ -769,6 +776,42 @@ struct llama_server_context
            slot->prompt = "";
        }

+        slot->sparams.penalty_prompt_tokens.clear();
+        slot->sparams.use_penalty_prompt_tokens = false;
+        const auto &penalty_prompt = data.find("penalty_prompt");
+        if (penalty_prompt != data.end())
+        {
+            if (penalty_prompt->is_string())
+            {
+                const auto penalty_prompt_string = penalty_prompt->get<std::string>();
+                auto penalty_tokens = llama_tokenize(model, penalty_prompt_string, false);
+                slot->sparams.penalty_prompt_tokens.swap(penalty_tokens);
+                if (slot->params.n_predict > 0)
+                {
+                    slot->sparams.penalty_prompt_tokens.reserve(slot->sparams.penalty_prompt_tokens.size() + slot->params.n_predict);
+                }
+                slot->sparams.use_penalty_prompt_tokens = true;
+            }
+            else if (penalty_prompt->is_array())
+            {
+                const auto n_tokens = penalty_prompt->size();
+                slot->sparams.penalty_prompt_tokens.reserve(n_tokens + std::max(0, slot->params.n_predict));
+                const int n_vocab = llama_n_vocab(model);
+                for (const auto &penalty_token : *penalty_prompt)
+                {
+                    if (penalty_token.is_number_integer())
+                    {
+                        const auto tok = penalty_token.get<llama_token>();
+                        if (tok >= 0 && tok < n_vocab)
+                        {
+                            slot->sparams.penalty_prompt_tokens.push_back(tok);
+                        }
+                    }
+                }
+                slot->sparams.use_penalty_prompt_tokens = true;
+            }
+        }
+
        slot->sparams.logit_bias.clear();

        if (json_value(data, "ignore_eos", false))
@@ -821,24 +864,17 @@ struct llama_server_context
            {
                for (const auto &img : *images_data)
                {
-                    std::string data_b64 = img["data"].get<std::string>();
+                    const std::vector<uint8_t> image_buffer = base64_decode(img["data"].get<std::string>());
+
                    slot_image img_sl;
                    img_sl.id = img.count("id") != 0 ? img["id"].get<int>() : slot->images.size();
-                    int width, height, channels;
-                    std::vector<uint8_t> image_buffer = base64_decode(data_b64);
-                    data_b64.clear();
-                    auto data = stbi_load_from_memory(image_buffer.data(), image_buffer.size(), &width, &height, &channels, 3);
-                    if (!data) {
+                    img_sl.img_data = clip_image_u8_init();
+                    if (!clip_image_load_from_bytes(image_buffer.data(), image_buffer.size(), img_sl.img_data))
+                    {
                        LOG_TEE("slot %i - failed to load image [id: %i]\n", slot->id, img_sl.id);
                        return false;
                    }
-                    LOG_TEE("slot %i - image loaded [id: %i] resolution (%i x %i)\n", slot->id, img_sl.id, width, height);
-                    img_sl.img_data.nx = width;
-                    img_sl.img_data.ny = height;
-                    img_sl.img_data.size = width * height * 3;
-                    img_sl.img_data.data = new uint8_t[width * height * 3]();
-                    memcpy(img_sl.img_data.data, data, width * height * 3);
-                    stbi_image_free(data);
+                    LOG_TEE("slot %i - loaded image\n", slot->id);
                    img_sl.request_encode_image = true;
                    slot->images.push_back(img_sl);
                }
@@ -893,6 +929,7 @@ struct llama_server_context
            llama_sampling_free(slot->ctx_sampling);
        }
        slot->ctx_sampling = llama_sampling_init(slot->sparams);
+        llama_set_rng_seed(ctx, slot->params.seed);
        slot->command = LOAD_PROMPT;

        all_slots_are_idle = false;
@@ -1000,6 +1037,12 @@ struct llama_server_context
        slot.generated_text += token_str;
        slot.has_next_token = true;

+        if (slot.ctx_sampling->params.use_penalty_prompt_tokens && result.tok != -1)
+        {
+            // we can change penalty_prompt_tokens because it is always created from scratch each request
+            slot.ctx_sampling->params.penalty_prompt_tokens.push_back(result.tok);
+        }
+
        // check if there is incomplete UTF-8 character at the end
        bool incomplete = false;
        for (unsigned i = 1; i < 5 && i <= slot.generated_text.size(); ++i)
@@ -1106,8 +1149,8 @@ struct llama_server_context
            {
                continue;
            }
-            clip_image_f32 img_res;
-            if (!clip_image_preprocess(clp_ctx, &img.img_data, &img_res, /*pad2square =*/ true))
+            clip_image_f32 * img_res = clip_image_f32_init();
+            if (!clip_image_preprocess(clp_ctx, img.img_data, img_res, /*pad2square =*/ true))
            {
                LOG_TEE("Error processing the given image");
                clip_free(clp_ctx);
@@ -1122,11 +1165,12 @@ struct llama_server_context
                return false;
            }
            LOG_TEE("slot %i - encoding image [id: %i]\n", slot.id, img.id);
-            if (!clip_image_encode(clp_ctx, params.n_threads, &img_res, img.image_embedding))
+            if (!clip_image_encode(clp_ctx, params.n_threads, img_res, img.image_embedding))
            {
                LOG_TEE("Unable to encode image\n");
                return false;
            }
+            clip_image_f32_free(img_res);
            img.request_encode_image = false;
        }

@@ -1135,7 +1179,7 @@ struct llama_server_context

    void send_error(task_server& task, std::string error)
    {
-        std::lock_guard<std::mutex> lock(mutex_results);
+        std::unique_lock<std::mutex> lock(mutex_results);
        task_result res;
        res.id = task.id;
        res.multitask_id = task.multitask_id;
@@ -1143,6 +1187,7 @@ struct llama_server_context
        res.error = true;
        res.result_json = { { "content", error } };
        queue_results.push_back(res);
+        condition_results.notify_all();
    }

    void add_multi_task(int id, std::vector<int>& sub_ids)
@@ -1152,6 +1197,7 @@ struct llama_server_context
        multi.id = id;
        std::copy(sub_ids.begin(), sub_ids.end(), std::inserter(multi.subtasks_remaining, multi.subtasks_remaining.end()));
        queue_multitasks.push_back(multi);
+        condition_tasks.notify_one();
    }

    void update_multi_task(int multitask_id, int subtask_id, task_result& result)
@@ -1163,6 +1209,7 @@ struct llama_server_context
            {
                multitask.subtasks_remaining.erase(subtask_id);
                multitask.results.push_back(result);
+                condition_tasks.notify_one();
            }
        }
    }
@@ -1181,7 +1228,7 @@ struct llama_server_context
            {"n_ctx",             slot.n_ctx},
            {"model",             params.model_alias},
            {"seed",              slot.params.seed},
-            {"temp",              slot.sparams.temp},
+            {"temperature",       slot.sparams.temp},
            {"top_k",             slot.sparams.top_k},
            {"top_p",             slot.sparams.top_p},
            {"min_p",             slot.sparams.min_p},
@@ -1191,6 +1238,8 @@ struct llama_server_context
            {"repeat_penalty",    slot.sparams.penalty_repeat},
            {"presence_penalty",  slot.sparams.penalty_present},
            {"frequency_penalty", slot.sparams.penalty_freq},
+            {"penalty_prompt_tokens", slot.sparams.penalty_prompt_tokens},
+            {"use_penalty_prompt_tokens", slot.sparams.use_penalty_prompt_tokens},
            {"mirostat",          slot.sparams.mirostat},
            {"mirostat_tau",      slot.sparams.mirostat_tau},
            {"mirostat_eta",      slot.sparams.mirostat_eta},
@@ -1208,7 +1257,7 @@ struct llama_server_context

    void send_partial_response(llama_client_slot &slot, completion_token_output tkn)
    {
-        std::lock_guard<std::mutex> lock(mutex_results);
+        std::unique_lock<std::mutex> lock(mutex_results);
        task_result res;
        res.id = slot.task_id;
        res.multitask_id = slot.multitask_id;
@@ -1244,11 +1293,12 @@ struct llama_server_context
        }

        queue_results.push_back(res);
+        condition_results.notify_all();
    }

    void send_final_response(llama_client_slot &slot)
    {
-        std::lock_guard<std::mutex> lock(mutex_results);
+        std::unique_lock<std::mutex> lock(mutex_results);
        task_result res;
        res.id = slot.task_id;
        res.multitask_id = slot.multitask_id;
@@ -1304,11 +1354,12 @@ struct llama_server_context
        }

        queue_results.push_back(res);
+        condition_results.notify_all();
    }

    void send_embedding(llama_client_slot &slot)
    {
-        std::lock_guard<std::mutex> lock(mutex_results);
+        std::unique_lock<std::mutex> lock(mutex_results);
        task_result res;
        res.id = slot.task_id;
        res.multitask_id = slot.multitask_id;
@@ -1336,6 +1387,7 @@ struct llama_server_context
            };
        }
        queue_results.push_back(res);
+        condition_results.notify_all();
    }

    int request_completion(json data, bool infill, bool embedding, int multitask_id)
@@ -1359,6 +1411,7 @@ struct llama_server_context

        // otherwise, it's a single-prompt task, we actually queue it
        queue_tasks.push_back(task);
+        condition_tasks.notify_one();
        return task.id;
    }

@@ -1366,13 +1419,10 @@ struct llama_server_context
    {
        while (true)
        {
-            std::this_thread::sleep_for(std::chrono::microseconds(5));
-            std::lock_guard<std::mutex> lock(mutex_results);
-
-            if (queue_results.empty())
-            {
-                continue;
-            }
+            std::unique_lock<std::mutex> lock(mutex_results);
+            condition_results.wait(lock, [&]{
+                return !queue_results.empty();
+            });

            for (int i = 0; i < (int) queue_results.size(); i++)
            {
@@ -1468,12 +1518,13 @@ struct llama_server_context

    void request_cancel(int task_id)
    {
-        std::lock_guard<std::mutex> lock(mutex_tasks);
+        std::unique_lock<std::mutex> lock(mutex_tasks);
        task_server task;
        task.id = id_gen++;
        task.type = CANCEL_TASK;
        task.target_id = task_id;
        queue_tasks.push_back(task);
+        condition_tasks.notify_one();
    }

    int split_multiprompt_task(task_server& multiprompt_task)
@@ -1499,7 +1550,7 @@ struct llama_server_context

    void process_tasks()
    {
-        std::lock_guard<std::mutex> lock(mutex_tasks);
+        std::unique_lock<std::mutex> lock(mutex_tasks);
        while (!queue_tasks.empty())
        {
            task_server task = queue_tasks.front();
@@ -1571,6 +1622,7 @@ struct llama_server_context

                std::lock_guard<std::mutex> lock(mutex_results);
                queue_results.push_back(aggregate_result);
+                condition_results.notify_all();

                queue_iterator = queue_multitasks.erase(queue_iterator);
            }
@@ -1601,8 +1653,10 @@ struct llama_server_context
                LOG_TEE("all slots are idle and system prompt is empty, clear the KV cache\n");
                kv_cache_clear();
            }
-            // avoid 100% usage of cpu all time
-            std::this_thread::sleep_for(std::chrono::milliseconds(5));
+            std::unique_lock<std::mutex> lock(mutex_tasks);
+            condition_tasks.wait(lock, [&]{
+                return !queue_tasks.empty();
+            });
        }

        for (llama_client_slot &slot : slots)
@@ -1962,28 +2016,35 @@ json oaicompat_completion_params_parse(
    llama_params["__oaicompat"] = true;

    // Map OpenAI parameters to llama.cpp parameters
+    //
+    // For parameters that are defined by the OpenAI documentation (e.g.
+    // temperature), we explicitly specify OpenAI's intended default; we
+    // need to do that because sometimes OpenAI disagrees with llama.cpp
+    //
+    // https://platform.openai.com/docs/api-reference/chat/create
+    llama_sampling_params default_sparams;
    llama_params["model"]             = json_value(body, "model", std::string("uknown"));
    llama_params["prompt"]            = format_chatml(body["messages"]); // OpenAI 'messages' to llama.cpp 'prompt'
    llama_params["cache_prompt"]      = json_value(body, "cache_prompt", false);
-    llama_params["temperature"]       = json_value(body, "temperature", 0.8);
-    llama_params["top_k"]             = json_value(body, "top_k", 40);
-    llama_params["top_p"]             = json_value(body, "top_p", 0.95);
+    llama_params["temperature"]       = json_value(body, "temperature", 0.0);
+    llama_params["top_k"]             = json_value(body, "top_k", default_sparams.top_k);
+    llama_params["top_p"]             = json_value(body, "top_p", 1.0);
    llama_params["n_predict"]         = json_value(body, "max_tokens", -1);
    llama_params["logit_bias"]        = json_value(body, "logit_bias",json::object());
    llama_params["frequency_penalty"] = json_value(body, "frequency_penalty", 0.0);
    llama_params["presence_penalty"]  = json_value(body, "presence_penalty", 0.0);
-    llama_params["seed"]              = json_value(body, "seed", 0);
+    llama_params["seed"]              = json_value(body, "seed", LLAMA_DEFAULT_SEED);
    llama_params["stream"]            = json_value(body, "stream", false);
-    llama_params["mirostat"]          = json_value(body, "mirostat", false);
-    llama_params["mirostat_tau"]      = json_value(body, "mirostat_tau", 0.0);
-    llama_params["mirostat_eta"]      = json_value(body, "mirostat_eta", 0.0);
-    llama_params["penalize_nl"]       = json_value(body, "penalize_nl", false);
-    llama_params["typical_p"]         = json_value(body, "typical_p", 0.0);
-    llama_params["repeat_last_n"]     = json_value(body, "repeat_last_n", 0);
+    llama_params["mirostat"]          = json_value(body, "mirostat", default_sparams.mirostat);
+    llama_params["mirostat_tau"]      = json_value(body, "mirostat_tau", default_sparams.mirostat_tau);
+    llama_params["mirostat_eta"]      = json_value(body, "mirostat_eta", default_sparams.mirostat_eta);
+    llama_params["penalize_nl"]       = json_value(body, "penalize_nl", default_sparams.penalize_nl);
+    llama_params["typical_p"]         = json_value(body, "typical_p", default_sparams.typical_p);
+    llama_params["repeat_last_n"]     = json_value(body, "repeat_last_n", default_sparams.penalty_last_n);
    llama_params["ignore_eos"]        = json_value(body, "ignore_eos", false);
-    llama_params["tfs_z"]             = json_value(body, "tfs_z", 0.0);
+    llama_params["tfs_z"]             = json_value(body, "tfs_z", default_sparams.tfs_z);

-    if (llama_params.count("grammar") != 0) {
+    if (body.count("grammar") != 0) {
        llama_params["grammar"] = json_value(body, "grammar", json::object());
    }

--- a/backend/python/diffusers/diffusers.yml
+++ b/backend/python/diffusers/diffusers.yml
@@ -53,6 +53,7 @@ dependencies:
      - nvidia-nccl-cu12==2.18.1
      - nvidia-nvjitlink-cu12==12.2.140
      - nvidia-nvtx-cu12==12.1.105
+      - omegaconf
      - packaging==23.2
      - pillow==10.0.1
      - protobuf==4.24.4
--- a/backend/python/exllama/run.sh
+++ b/backend/python/exllama/run.sh
@@ -11,4 +11,6 @@ source activate exllama
 # get the directory where the bash script is located
 DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"

+cd $DIR
+
 python $DIR/exllama.py $@
--- a/backend/python/exllama2/run.sh
+++ b/backend/python/exllama2/run.sh
@@ -11,4 +11,6 @@ source activate exllama2
 # get the directory where the bash script is located
 DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"

+cd $DIR
+
 python $DIR/exllama2_backend.py $@
--- a/backend/python/vall-e-x/run.sh
+++ b/backend/python/vall-e-x/run.sh
@@ -10,4 +10,6 @@ source activate transformers
 # get the directory where the bash script is located
 DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"

+cd $DIR
+
 python $DIR/ttsvalle.py $@
--- a/docs/content/_index.en.md
+++ b/docs/content/_index.en.md
@@ -36,10 +36,10 @@ In a nutshell:

 - Local, OpenAI drop-in alternative REST API. You own your data.
 - NO GPU required. NO Internet access is required either
-  - Optional, GPU Acceleration is available in `llama.cpp`-compatible LLMs. See also the [build section](https://localai.io/basics/build/index.html).
+  - Optional, GPU Acceleration is available. See also the [build section](https://localai.io/basics/build/index.html).
 - Supports multiple models
 - 🏃 Once loaded the first time, it keep models loaded in memory for faster inference
- ⚡ Doesn't shell-out, but uses C++ bindings for a faster inference and better performance.
+- ⚡ Doesn't shell-out, but uses bindings for a faster inference and better performance.

 LocalAI is focused on making the AI accessible to anyone. Any contribution, feedback and PR is welcome!

--- a/docs/content/advanced/_index.en.md
+++ b/docs/content/advanced/_index.en.md
@@ -359,15 +359,7 @@ docker run --env REBUILD=true localai
 docker run --env-file .env localai
 ```

-### Build only a single backend

-You can control the backends that are built by setting the `GRPC_BACKENDS` environment variable. For instance, to build only the `llama-cpp` backend only:
-
-```bash
-make GRPC_BACKENDS=backend-assets/grpc/llama-cpp build
-```
-
-By default, all the backends are built.

 ### Extra backends

--- a/docs/content/build/_index.en.md
+++ b/docs/content/build/_index.en.md
@@ -7,16 +7,15 @@ url = '/basics/build/'

 +++

-### Build locally
+### Build
+
+#### Container image

 Requirements:

-Either Docker/podman, or
- Golang >= 1.21
- Cmake/make
- GCC
+- Docker or podman, or a container engine

-In order to build the `LocalAI` container image locally you can use `docker`:
+In order to build the `LocalAI` container image locally you can use `docker`, for example:

 ```
 # build the image
@@ -24,7 +23,45 @@ docker build -t localai .
 docker run localai
 ```

-Or you can build the manually binary with `make`:
+#### Locally
+
+In order to build LocalAI locally, you need the following requirements:
+
+- Golang >= 1.21
+- Cmake/make
+- GCC
+- GRPC
+
+To install the dependencies follow the instructions below:
+
+{{< tabs >}}
+{{% tab name="Apple" %}}
+
+```bash
+brew install abseil cmake go grpc protobuf wget
+```
+
+{{% /tab %}}
+{{% tab name="Debian" %}}
+
+```bash
+apt install protobuf-compiler-grpc libgrpc-dev make cmake
+```
+
+{{% /tab %}}
+{{% tab name="From source" %}}
+
+Specify `BUILD_GRPC_FOR_BACKEND_LLAMA=true` to build automatically the gRPC dependencies
+
+```bash
+make ... BUILD_GRPC_FOR_BACKEND_LLAMA=true build
+```
+
+{{% /tab %}}
+{{< /tabs >}}
+
+
+To build LocalAI with `make`:

 ```
 git clone https://github.com/go-skynet/LocalAI
@@ -32,7 +69,7 @@ cd LocalAI
 make build
 ```

-To run: `./local-ai`
+This should produce the binary `local-ai`

 {{% notice note %}}

@@ -54,7 +91,7 @@ docker run --rm -ti -p 8080:8080 -e DEBUG=true -e MODELS_PATH=/models -e THREADS

 {{% /notice %}}

-### Build on mac
+### Example: Build on mac

 Building on Mac (M1 or M2) works, but you may need to install some prerequisites using `brew`. 

@@ -188,6 +225,16 @@ make BUILD_TYPE=metal build
 # Note: only models quantized with q4_0 are supported!
 ```

+### Build only a single backend
+
+You can control the backends that are built by setting the `GRPC_BACKENDS` environment variable. For instance, to build only the `llama-cpp` backend only:
+
+```bash
+make GRPC_BACKENDS=backend-assets/grpc/llama-cpp build
+```
+
+By default, all the backends are built.
+
 ### Windows compatibility

 Make sure to give enough resources to the running container. See https://github.com/go-skynet/LocalAI/issues/2
--- a/docs/data/version.json
+++ b/docs/data/version.json
@@ -1,3 +1,3 @@
 {
-  "version": "v2.2.0"
+  "version": "v2.3.1"
 }
--- a/examples/configurations/README.md
+++ b/examples/configurations/README.md
@@ -67,6 +67,17 @@ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/jso

 ```

+### Phi-2
+
+```
+cp -r examples/configurations/phi-2.yaml models/
+
+curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
+     "model": "phi-2",
+     "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
+}'
+```
+
 ### Mixtral

 ```
--- a/examples/configurations/phi-2.yaml
+++ b/examples/configurations/phi-2.yaml
@@ -0,0 +1,17 @@
+name: phi-2
+context_size: 2048
+f16: true
+gpu_layers: 90
+mmap: true
+trimsuffix: 
+- "\n"
+parameters:
+  model: huggingface://TheBloke/phi-2-GGUF/phi-2.Q8_0.gguf
+  temperature: 0.2
+  top_k: 40
+  top_p: 0.95
+template:
+  chat: &template |
+    Instruct: {{.Input}}
+    Output:
+  completion: *template
--- a/main.go
+++ b/main.go
@@ -99,6 +99,11 @@ func main() {
 				Usage:   "A List of models to apply in JSON at start",
 				EnvVars: []string{"PRELOAD_MODELS"},
 			},
+			&cli.StringFlag{
+				Name:    "models",
+				Usage:   "A List of models URLs configurations.",
+				EnvVars: []string{"MODELS"},
+			},
 			&cli.StringFlag{
 				Name:    "preload-models-config",
 				Usage:   "A List of models to apply at startup. Path to a YAML config file",
@@ -222,6 +227,7 @@ For a list of compatible model, check out: https://localai.io/model-compatibilit
 				options.WithBackendAssetsOutput(ctx.String("backend-assets-path")),
 				options.WithUploadLimitMB(ctx.Int("upload-limit")),
 				options.WithApiKeys(ctx.StringSlice("api-keys")),
+				options.WithModelsURL(append(ctx.StringSlice("models"), ctx.Args().Slice()...)...),
 			}

 			idleWatchDog := ctx.Bool("enable-watchdog-idle")
--- a/pkg/model/initializers.go
+++ b/pkg/model/initializers.go
@@ -239,10 +239,10 @@ func (ml *ModelLoader) GreedyLoader(opts ...Option) (*grpc.Client, error) {
 	for _, b := range o.externalBackends {
 		allBackendsToAutoLoad = append(allBackendsToAutoLoad, b)
 	}
-	log.Debug().Msgf("Loading model '%s' greedly from all the available backends: %s", o.model, strings.Join(allBackendsToAutoLoad, ", "))
+	log.Info().Msgf("Loading model '%s' greedly from all the available backends: %s", o.model, strings.Join(allBackendsToAutoLoad, ", "))

 	for _, b := range allBackendsToAutoLoad {
-		log.Debug().Msgf("[%s] Attempting to load", b)
+		log.Info().Msgf("[%s] Attempting to load", b)
 		options := []Option{
 			WithBackendString(b),
 			WithModel(o.model),
@@ -257,14 +257,14 @@ func (ml *ModelLoader) GreedyLoader(opts ...Option) (*grpc.Client, error) {

 		model, modelerr := ml.BackendLoader(options...)
 		if modelerr == nil && model != nil {
-			log.Debug().Msgf("[%s] Loads OK", b)
+			log.Info().Msgf("[%s] Loads OK", b)
 			return model, nil
 		} else if modelerr != nil {
 			err = multierror.Append(err, modelerr)
-			log.Debug().Msgf("[%s] Fails: %s", b, modelerr.Error())
+			log.Info().Msgf("[%s] Fails: %s", b, modelerr.Error())
 		} else if model == nil {
 			err = multierror.Append(err, fmt.Errorf("backend returned no usable model"))
-			log.Debug().Msgf("[%s] Fails: %s", b, "backend returned no usable model")
+			log.Info().Msgf("[%s] Fails: %s", b, "backend returned no usable model")
 		}
 	}

--- a/pkg/utils/logging.go
+++ b/pkg/utils/logging.go
@@ -29,9 +29,9 @@ func DisplayDownloadFunction(fileName string, current string, total string, perc
 		}

 		if total != "" {
-			log.Debug().Msgf("Downloading %s: %s/%s (%.2f%%) ETA: %s", fileName, current, total, percentage, eta)
+			log.Info().Msgf("Downloading %s: %s/%s (%.2f%%) ETA: %s", fileName, current, total, percentage, eta)
 		} else {
-			log.Debug().Msgf("Downloading: %s", current)
+			log.Info().Msgf("Downloading: %s", current)
 		}
 	}
 }
--- a/pkg/utils/uri.go
+++ b/pkg/utils/uri.go
@@ -15,27 +15,8 @@ import (
 	"github.com/rs/zerolog/log"
 )

-const (
-	githubURI = "github:"
-)
-
 func GetURI(url string, f func(url string, i []byte) error) error {
-	if strings.HasPrefix(url, githubURI) {
-		parts := strings.Split(url, ":")
-		repoParts := strings.Split(parts[1], "@")
-		branch := "main"
-
-		if len(repoParts) > 1 {
-			branch = repoParts[1]
-		}
-
-		repoPath := strings.Split(repoParts[0], "/")
-		org := repoPath[0]
-		project := repoPath[1]
-		projectPath := strings.Join(repoPath[2:], "/")
-
-		url = fmt.Sprintf("https://raw.githubusercontent.com/%s/%s/%s/%s", org, project, branch, projectPath)
-	}
+	url = ConvertURL(url)

 	if strings.HasPrefix(url, "file://") {
 		rawURL := strings.TrimPrefix(url, "file://")
@@ -71,10 +52,57 @@ func GetURI(url string, f func(url string, i []byte) error) error {
 	return f(url, body)
 }

+const (
+	HuggingFacePrefix = "huggingface://"
+	HTTPPrefix        = "http://"
+	HTTPSPrefix       = "https://"
+	GithubURI         = "github:"
+	GithubURI2        = "github://"
+)
+
+func LooksLikeURL(s string) bool {
+	return strings.HasPrefix(s, HTTPPrefix) ||
+		strings.HasPrefix(s, HTTPSPrefix) ||
+		strings.HasPrefix(s, HuggingFacePrefix) ||
+		strings.HasPrefix(s, GithubURI) ||
+		strings.HasPrefix(s, GithubURI2)
+}
+
 func ConvertURL(s string) string {
 	switch {
-	case strings.HasPrefix(s, "huggingface://"):
-		repository := strings.Replace(s, "huggingface://", "", 1)
+	case strings.HasPrefix(s, GithubURI2):
+		repository := strings.Replace(s, GithubURI2, "", 1)
+
+		repoParts := strings.Split(repository, "@")
+		branch := "main"
+
+		if len(repoParts) > 1 {
+			branch = repoParts[1]
+		}
+
+		repoPath := strings.Split(repoParts[0], "/")
+		org := repoPath[0]
+		project := repoPath[1]
+		projectPath := strings.Join(repoPath[2:], "/")
+
+		return fmt.Sprintf("https://raw.githubusercontent.com/%s/%s/%s/%s", org, project, branch, projectPath)
+	case strings.HasPrefix(s, GithubURI):
+		parts := strings.Split(s, ":")
+		repoParts := strings.Split(parts[1], "@")
+		branch := "main"
+
+		if len(repoParts) > 1 {
+			branch = repoParts[1]
+		}
+
+		repoPath := strings.Split(repoParts[0], "/")
+		org := repoPath[0]
+		project := repoPath[1]
+		projectPath := strings.Join(repoPath[2:], "/")
+
+		return fmt.Sprintf("https://raw.githubusercontent.com/%s/%s/%s/%s", org, project, branch, projectPath)
+	case strings.HasPrefix(s, HuggingFacePrefix):
+		repository := strings.Replace(s, HuggingFacePrefix, "", 1)
 		// convert repository to a full URL.
 		// e.g. TheBloke/Mixtral-8x7B-v0.1-GGUF/mixtral-8x7b-v0.1.Q2_K.gguf@main -> https://huggingface.co/TheBloke/Mixtral-8x7B-v0.1-GGUF/resolve/main/mixtral-8x7b-v0.1.Q2_K.gguf
 		owner := strings.Split(repository, "/")[0]
Author	SHA1	Message	Date
Ettore Di Giacinto	bcf02449b3	ci(dockerhub): push images also to dockerhub (#1542 ) Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2024-01-04 08:32:29 +01:00
LocalAI [bot]	d48faf35ab	⬆️ Update ggerganov/llama.cpp (#1544 ) Signed-off-by: GitHub <noreply@github.com> Co-authored-by: mudler <mudler@users.noreply.github.com>	2024-01-04 00:08:03 +01:00
Ettore Di Giacinto	583bd28a5c	fix(diffusers): add omegaconf dependency (#1540 ) Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2024-01-04 00:06:41 +01:00
LocalAI [bot]	7e1d8c489b	⬆️ Update ggerganov/llama.cpp (#1533 ) Signed-off-by: GitHub <noreply@github.com> Co-authored-by: mudler <mudler@users.noreply.github.com>	2024-01-03 08:43:35 +01:00
LocalAI [bot]	de28867374	⬆️ Update ggerganov/llama.cpp (#1531 ) Signed-off-by: GitHub <noreply@github.com> Co-authored-by: mudler <mudler@users.noreply.github.com>	2024-01-02 00:28:22 +00:00
Ettore Di Giacinto	a1aa6cb7c2	fix(entrypoint): cd to backend dir before start (#1530 ) Certain backends as vall-e-x are not meant to be used as a library, so we want to start the process in the same folder where the backend and all the assets are fixes #1394	2024-01-01 22:02:48 +01:00
Ettore Di Giacinto	85e2767dca	feat: add trimsuffix (#1528 )	2024-01-01 14:39:42 +01:00
Ettore Di Giacinto	fd48cb6506	deps(llama.cpp): update and sync grpc server (#1527 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-01-01 14:39:31 +01:00
Ettore Di Giacinto	522659eb59	feat(prepare): allow to specify additional files to download (#1526 )	2024-01-01 14:39:13 +01:00
Ettore Di Giacinto	f068efe509	docs(phi-2): add example (#1525 )	2024-01-01 10:51:47 +01:00
Ettore Di Giacinto	726fe416bb	docs: update hot topics Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2024-01-01 10:41:39 +01:00
Ettore Di Giacinto	66fa4f1767	feat: share models by url (#1522 ) * feat: allow to pass by models via args * expose it also as an env/arg * docs: enhancements to build/requirements * do not display status always * print download status * not all mesages are debug	2024-01-01 10:31:03 +01:00
Ettore Di Giacinto	d6565f3b99	Update _index.en.md Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2023-12-31 10:58:22 +01:00
LocalAI [bot]	27686ff20b	⬆️ Update ggerganov/llama.cpp (#1518 ) Signed-off-by: GitHub <noreply@github.com> Co-authored-by: mudler <mudler@users.noreply.github.com>	2023-12-31 00:19:08 +00:00
LocalAI [bot]	a8b865022f	⬆️ Update docs version mudler/LocalAI (#1517 ) Signed-off-by: GitHub <noreply@github.com> Co-authored-by: mudler <mudler@users.noreply.github.com>	2023-12-30 23:50:24 +00:00
Ettore Di Giacinto	c1888a8062	feat(preload): prepare models in galleries (#1515 ) Previously if applying models from the gallery API, we didn't actually allowed remote URLs as models as nothing was actually downloading the models referenced in the configuration file. Now we call Preload after we have all the models loaded in memory.	2023-12-30 18:55:18 +01:00
Ettore Di Giacinto	a95bb0521d	fix(download): correctly check for not found error (#1514 )	2023-12-30 15:36:46 +01:00
Chris Natale	e2311a145c	Fix: Set proper Homebrew install location for x86 Macs (#1510 ) * set proper Homebrew install location for x86 Macs * fix: remove prior conditional that my logic replaces	2023-12-30 12:37:26 +01:00
lunamidori5	d4e0bab6be	Update version.json (2.3.0) (#1511 ) Update version.json Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>	2023-12-30 10:19:46 +01:00
LocalAI [bot]	5b0dc20e4c	⬆️ Update ggerganov/llama.cpp (#1509 ) Signed-off-by: GitHub <noreply@github.com> Co-authored-by: mudler <mudler@users.noreply.github.com>	2023-12-30 09:19:07 +00:00