mirror of
https://github.com/ollama/ollama.git
synced 2025-12-24 08:10:54 -05:00
Compare commits
25 Commits
jmorganca/
...
bmizerany/
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d9ea2e5c7a | ||
|
|
5ce997a7b9 | ||
|
|
672ffe9b7d | ||
|
|
47cfe58af5 | ||
|
|
e72c567cfd | ||
|
|
3e22611200 | ||
|
|
a54d4a28dc | ||
|
|
82b0c7c27e | ||
|
|
ba7cf7fb66 | ||
|
|
2f804068bd | ||
|
|
34d00f90b1 | ||
|
|
b53229a2ed | ||
|
|
53c107e20e | ||
|
|
51578d8573 | ||
|
|
b5fcd9d3aa | ||
|
|
b80661e8c7 | ||
|
|
6d3adfbea2 | ||
|
|
369eda65f5 | ||
|
|
f878e91070 | ||
|
|
0d651478e4 | ||
|
|
9ea492f1ce | ||
|
|
bc13da2bfe | ||
|
|
41b00b9856 | ||
|
|
c2a8ed48e7 | ||
|
|
3dc1bb6a35 |
@@ -92,7 +92,7 @@ COPY --from=rocm-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/b
|
||||
COPY --from=rocm-build-amd64 /go/src/github.com/jmorganca/ollama/dist/deps/ ./dist/deps/
|
||||
ARG GOFLAGS
|
||||
ARG CGO_CFLAGS
|
||||
RUN go build .
|
||||
RUN go build -trimpath .
|
||||
|
||||
# Intermediate stage used for ./scripts/build_linux.sh
|
||||
FROM --platform=linux/arm64 cpu-build-arm64 AS build-arm64
|
||||
@@ -103,7 +103,7 @@ COPY . .
|
||||
COPY --from=cuda-build-arm64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
|
||||
ARG GOFLAGS
|
||||
ARG CGO_CFLAGS
|
||||
RUN go build .
|
||||
RUN go build -trimpath .
|
||||
|
||||
# Runtime stages
|
||||
FROM --platform=linux/amd64 ubuntu:22.04 as runtime-amd64
|
||||
|
||||
50
api/types_test.go
Normal file
50
api/types_test.go
Normal file
@@ -0,0 +1,50 @@
|
||||
package api
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"math"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestKeepAliveParsingFromJSON(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
req string
|
||||
exp *Duration
|
||||
}{
|
||||
{
|
||||
name: "Positive Integer",
|
||||
req: `{ "keep_alive": 42 }`,
|
||||
exp: &Duration{42 * time.Second},
|
||||
},
|
||||
{
|
||||
name: "Positive Integer String",
|
||||
req: `{ "keep_alive": "42m" }`,
|
||||
exp: &Duration{42 * time.Minute},
|
||||
},
|
||||
{
|
||||
name: "Negative Integer",
|
||||
req: `{ "keep_alive": -1 }`,
|
||||
exp: &Duration{math.MaxInt64},
|
||||
},
|
||||
{
|
||||
name: "Negative Integer String",
|
||||
req: `{ "keep_alive": "-1m" }`,
|
||||
exp: &Duration{math.MaxInt64},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
var dec ChatRequest
|
||||
err := json.Unmarshal([]byte(test.req), &dec)
|
||||
require.NoError(t, err)
|
||||
|
||||
assert.Equal(t, test.exp, dec.KeepAlive)
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -970,9 +970,10 @@ func NewCLI() *cobra.Command {
|
||||
serveCmd.SetUsageTemplate(serveCmd.UsageTemplate() + `
|
||||
Environment Variables:
|
||||
|
||||
OLLAMA_HOST The host:port to bind to (default "127.0.0.1:11434")
|
||||
OLLAMA_ORIGINS A comma separated list of allowed origins.
|
||||
OLLAMA_MODELS The path to the models directory (default is "~/.ollama/models")
|
||||
OLLAMA_HOST The host:port to bind to (default "127.0.0.1:11434")
|
||||
OLLAMA_ORIGINS A comma separated list of allowed origins.
|
||||
OLLAMA_MODELS The path to the models directory (default is "~/.ollama/models")
|
||||
OLLAMA_KEEP_ALIVE The duration that models stay loaded in memory (default is "5m")
|
||||
`)
|
||||
|
||||
pullCmd := &cobra.Command{
|
||||
|
||||
@@ -103,9 +103,9 @@ func ReadSafeTensors(fn string, offset uint64) ([]llm.Tensor, uint64, error) {
|
||||
return []llm.Tensor{}, 0, err
|
||||
}
|
||||
|
||||
shape := [4]uint64{1, 1, 1, 1}
|
||||
for cnt, s := range data.Shape {
|
||||
shape[cnt] = uint64(s)
|
||||
shape := []uint64{0, 0, 0, 0}
|
||||
for i := range data.Shape {
|
||||
shape[i] = uint64(data.Shape[i])
|
||||
}
|
||||
|
||||
t := llm.Tensor{
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
### Getting Started
|
||||
* [Quickstart](../README.md#quickstart)
|
||||
* [Examples](../examples)
|
||||
* [Importing models](./import.md) from GGUF, Pytorch and Safetensors
|
||||
* [Importing models](./import.md)
|
||||
* [Linux Documentation](./linux.md)
|
||||
* [Windows Documentation](./windows.md)
|
||||
* [Docker Documentation](https://hub.docker.com/r/ollama/ollama)
|
||||
|
||||
10
docs/faq.md
10
docs/faq.md
@@ -193,3 +193,13 @@ To unload the model and free up memory use:
|
||||
```shell
|
||||
curl http://localhost:11434/api/generate -d '{"model": "llama2", "keep_alive": 0}'
|
||||
```
|
||||
|
||||
## Controlling which GPUs to use
|
||||
|
||||
By default, on Linux and Windows, Ollama will attempt to use Nvidia GPUs, or
|
||||
Radeon GPUs, and will use all the GPUs it can find. You can limit which GPUs
|
||||
will be utilized by setting the environment variable `CUDA_VISIBLE_DEVICES` for
|
||||
NVIDIA cards, or `HIP_VISIBLE_DEVICES` for Radeon GPUs to a comma delimited list
|
||||
of GPU IDs. You can see the list of devices with GPU tools such as `nvidia-smi` or
|
||||
`rocminfo`. You can set to an invalid GPU ID (e.g., "-1") to bypass the GPU and
|
||||
fallback to CPU.
|
||||
@@ -131,7 +131,7 @@ The `PARAMETER` instruction defines a parameter that can be set when the model i
|
||||
PARAMETER <parameter> <parametervalue>
|
||||
```
|
||||
|
||||
### Valid Parameters and Values
|
||||
#### Valid Parameters and Values
|
||||
|
||||
| Parameter | Description | Value Type | Example Usage |
|
||||
| -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------- | -------------------- |
|
||||
@@ -201,7 +201,22 @@ LICENSE """
|
||||
|
||||
### MESSAGE
|
||||
|
||||
The `MESSAGE` instruction allows you to specify a message history for the model to use when responding:
|
||||
The `MESSAGE` instruction allows you to specify a message history for the model to use when responding. Use multiple iterations of the MESSAGE command to build up a conversation which will guide the model to answer in a similar way.
|
||||
|
||||
```modelfile
|
||||
MESSAGE <role> <message>
|
||||
```
|
||||
|
||||
#### Valid roles
|
||||
|
||||
| Role | Description |
|
||||
| --------- | ------------------------------------------------------------ |
|
||||
| system | Alternate way of providing the SYSTEM message for the model. |
|
||||
| user | An example message of what the user could have asked. |
|
||||
| assistant | An example message of how the model should respond. |
|
||||
|
||||
|
||||
#### Example conversation
|
||||
|
||||
```modelfile
|
||||
MESSAGE user Is Toronto in Canada?
|
||||
@@ -212,6 +227,7 @@ MESSAGE user Is Ontario in Canada?
|
||||
MESSAGE assistant yes
|
||||
```
|
||||
|
||||
|
||||
## Notes
|
||||
|
||||
- the **`Modelfile` is not case sensitive**. In the examples, uppercase instructions are used to make it easier to distinguish it from arguments.
|
||||
|
||||
@@ -109,7 +109,3 @@ which version to install.
|
||||
```sh
|
||||
curl -fsSL https://ollama.com/install.sh | OLLAMA_VERSION="0.1.27" sh
|
||||
```
|
||||
|
||||
## Known issues
|
||||
|
||||
* N/A
|
||||
@@ -40,19 +40,17 @@ func amdSetVisibleDevices(ids []int, skip map[int]interface{}) {
|
||||
// TODO - does sort order matter?
|
||||
devices := []string{}
|
||||
for i := range ids {
|
||||
slog.Debug(fmt.Sprintf("i=%d", i))
|
||||
if _, skipped := skip[i]; skipped {
|
||||
slog.Debug("skipped")
|
||||
continue
|
||||
}
|
||||
devices = append(devices, strconv.Itoa(i))
|
||||
}
|
||||
slog.Debug(fmt.Sprintf("devices=%v", devices))
|
||||
|
||||
val := strings.Join(devices, ",")
|
||||
err := os.Setenv("HIP_VISIBLE_DEVICES", val)
|
||||
if err != nil {
|
||||
slog.Warn(fmt.Sprintf("failed to set env: %s", err))
|
||||
} else {
|
||||
slog.Info("Setting HIP_VISIBLE_DEVICES=" + val)
|
||||
}
|
||||
slog.Debug("HIP_VISIBLE_DEVICES=" + val)
|
||||
}
|
||||
|
||||
@@ -24,6 +24,9 @@ const (
|
||||
GPUTotalMemoryFileGlob = "mem_banks/*/properties" // size_in_bytes line
|
||||
GPUUsedMemoryFileGlob = "mem_banks/*/used_memory"
|
||||
RocmStandardLocation = "/opt/rocm/lib"
|
||||
|
||||
// TODO find a better way to detect iGPU instead of minimum memory
|
||||
IGPUMemLimit = 1024 * 1024 * 1024 // 512G is what they typically report, so anything less than 1G must be iGPU
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -146,8 +149,8 @@ func amdProcMemLookup(resp *GpuInfo, skip map[int]interface{}, ids []int) {
|
||||
resp.memInfo.DeviceCount = 0
|
||||
resp.memInfo.TotalMemory = 0
|
||||
resp.memInfo.FreeMemory = 0
|
||||
slog.Debug("discovering VRAM for amdgpu devices")
|
||||
if len(ids) == 0 {
|
||||
slog.Debug("discovering all amdgpu devices")
|
||||
entries, err := os.ReadDir(AMDNodesSysfsDir)
|
||||
if err != nil {
|
||||
slog.Warn(fmt.Sprintf("failed to read amdgpu sysfs %s - %s", AMDNodesSysfsDir, err))
|
||||
@@ -165,7 +168,7 @@ func amdProcMemLookup(resp *GpuInfo, skip map[int]interface{}, ids []int) {
|
||||
ids = append(ids, id)
|
||||
}
|
||||
}
|
||||
slog.Debug(fmt.Sprintf("discovering amdgpu devices %v", ids))
|
||||
slog.Debug(fmt.Sprintf("amdgpu devices %v", ids))
|
||||
|
||||
for _, id := range ids {
|
||||
if _, skipped := skip[id]; skipped {
|
||||
@@ -173,7 +176,8 @@ func amdProcMemLookup(resp *GpuInfo, skip map[int]interface{}, ids []int) {
|
||||
}
|
||||
totalMemory := uint64(0)
|
||||
usedMemory := uint64(0)
|
||||
propGlob := filepath.Join(AMDNodesSysfsDir, strconv.Itoa(id), GPUTotalMemoryFileGlob)
|
||||
// Adjust for sysfs vs HIP ids
|
||||
propGlob := filepath.Join(AMDNodesSysfsDir, strconv.Itoa(id+1), GPUTotalMemoryFileGlob)
|
||||
propFiles, err := filepath.Glob(propGlob)
|
||||
if err != nil {
|
||||
slog.Warn(fmt.Sprintf("error looking up total GPU memory: %s %s", propGlob, err))
|
||||
@@ -205,6 +209,13 @@ func amdProcMemLookup(resp *GpuInfo, skip map[int]interface{}, ids []int) {
|
||||
}
|
||||
}
|
||||
if totalMemory == 0 {
|
||||
slog.Warn(fmt.Sprintf("amdgpu [%d] reports zero total memory, skipping", id))
|
||||
skip[id] = struct{}{}
|
||||
continue
|
||||
}
|
||||
if totalMemory < IGPUMemLimit {
|
||||
slog.Info(fmt.Sprintf("amdgpu [%d] appears to be an iGPU with %dM reported total memory, skipping", id, totalMemory/1024/1024))
|
||||
skip[id] = struct{}{}
|
||||
continue
|
||||
}
|
||||
usedGlob := filepath.Join(AMDNodesSysfsDir, strconv.Itoa(id), GPUUsedMemoryFileGlob)
|
||||
@@ -232,8 +243,8 @@ func amdProcMemLookup(resp *GpuInfo, skip map[int]interface{}, ids []int) {
|
||||
}
|
||||
usedMemory += used
|
||||
}
|
||||
slog.Info(fmt.Sprintf("[%d] amdgpu totalMemory %d", id, totalMemory))
|
||||
slog.Info(fmt.Sprintf("[%d] amdgpu freeMemory %d", id, (totalMemory - usedMemory)))
|
||||
slog.Info(fmt.Sprintf("[%d] amdgpu totalMemory %dM", id, totalMemory/1024/1024))
|
||||
slog.Info(fmt.Sprintf("[%d] amdgpu freeMemory %dM", id, (totalMemory-usedMemory)/1024/1024))
|
||||
resp.memInfo.DeviceCount++
|
||||
resp.memInfo.TotalMemory += totalMemory
|
||||
resp.memInfo.FreeMemory += (totalMemory - usedMemory)
|
||||
@@ -282,7 +293,7 @@ func AMDValidateLibDir() (string, error) {
|
||||
}
|
||||
|
||||
// If we already have a rocm dependency wired, nothing more to do
|
||||
rocmTargetDir := filepath.Join(payloadsDir, "rocm")
|
||||
rocmTargetDir := filepath.Clean(filepath.Join(payloadsDir, "..", "rocm"))
|
||||
if rocmLibUsable(rocmTargetDir) {
|
||||
return rocmTargetDir, nil
|
||||
}
|
||||
@@ -358,6 +369,8 @@ func AMDDriverVersion() (string, error) {
|
||||
}
|
||||
|
||||
func AMDGFXVersions() map[int]Version {
|
||||
// The amdgpu driver always exposes the host CPU as node 0, but we have to skip that and subtract one
|
||||
// from the other IDs to get alignment with the HIP libraries expectations (zero is the first GPU, not the CPU)
|
||||
res := map[int]Version{}
|
||||
matches, _ := filepath.Glob(GPUPropertiesFileGlob)
|
||||
for _, match := range matches {
|
||||
@@ -373,17 +386,20 @@ func AMDGFXVersions() map[int]Version {
|
||||
continue
|
||||
}
|
||||
|
||||
if i == 0 {
|
||||
// Skipping the CPU
|
||||
continue
|
||||
}
|
||||
// Align with HIP IDs (zero is first GPU, not CPU)
|
||||
i -= 1
|
||||
|
||||
scanner := bufio.NewScanner(fp)
|
||||
for scanner.Scan() {
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
if strings.HasPrefix(line, "gfx_target_version") {
|
||||
ver := strings.Fields(line)
|
||||
if len(ver) != 2 || len(ver[1]) < 5 {
|
||||
|
||||
if ver[1] == "0" {
|
||||
// Silently skip the CPU
|
||||
continue
|
||||
} else {
|
||||
if ver[1] != "0" {
|
||||
slog.Debug("malformed " + line)
|
||||
}
|
||||
res[i] = Version{
|
||||
|
||||
@@ -23,7 +23,9 @@ func PayloadsDir() (string, error) {
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to generate tmp dir: %w", err)
|
||||
}
|
||||
payloadsDir = tmpDir
|
||||
// We create a distinct subdirectory for payloads within the tmpdir
|
||||
// This will typically look like /tmp/ollama3208993108/runners on linux
|
||||
payloadsDir = filepath.Join(tmpDir, "runners")
|
||||
}
|
||||
return payloadsDir, nil
|
||||
}
|
||||
@@ -32,10 +34,12 @@ func Cleanup() {
|
||||
lock.Lock()
|
||||
defer lock.Unlock()
|
||||
if payloadsDir != "" {
|
||||
slog.Debug("cleaning up", "dir", payloadsDir)
|
||||
err := os.RemoveAll(payloadsDir)
|
||||
// We want to fully clean up the tmpdir parent of the payloads dir
|
||||
tmpDir := filepath.Clean(filepath.Join(payloadsDir, ".."))
|
||||
slog.Debug("cleaning up", "dir", tmpDir)
|
||||
err := os.RemoveAll(tmpDir)
|
||||
if err != nil {
|
||||
slog.Warn("failed to clean up", "dir", payloadsDir, "err", err)
|
||||
slog.Warn("failed to clean up", "dir", tmpDir, "err", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -155,8 +155,8 @@ void cuda_check_vram(cuda_handle_t h, mem_info_t *resp) {
|
||||
}
|
||||
}
|
||||
|
||||
LOG(h.verbose, "[%d] CUDA totalMem %ld\n", i, memInfo.total);
|
||||
LOG(h.verbose, "[%d] CUDA usedMem %ld\n", i, memInfo.used);
|
||||
LOG(h.verbose, "[%d] CUDA totalMem %llu\n", i, memInfo.total);
|
||||
LOG(h.verbose, "[%d] CUDA usedMem %llu\n", i, memInfo.used);
|
||||
|
||||
resp->total += memInfo.total;
|
||||
resp->free += memInfo.free;
|
||||
|
||||
@@ -149,7 +149,7 @@ func newDynExtServer(library, model string, adapters, projectors []string, opts
|
||||
|
||||
slog.Info("Initializing llama server")
|
||||
slog.Debug(fmt.Sprintf("server params: %+v", sparams))
|
||||
initResp := newExtServerResp(128)
|
||||
initResp := newExtServerResp(512)
|
||||
defer freeExtServerResp(initResp)
|
||||
C.dyn_llama_server_init(llm.s, &sparams, &initResp)
|
||||
if initResp.id < 0 {
|
||||
@@ -198,6 +198,9 @@ func (llm *dynExtServer) Predict(ctx context.Context, predict PredictOpts, fn fu
|
||||
|
||||
if predict.Format == "json" {
|
||||
request["grammar"] = jsonGrammar
|
||||
if !strings.Contains(strings.ToLower(predict.Prompt), "json") {
|
||||
slog.Warn("Prompt does not specify that the LLM should response in JSON, but JSON format is expected. For best results specify that JSON is expected in the system prompt.")
|
||||
}
|
||||
}
|
||||
|
||||
retryDelay := 100 * time.Microsecond
|
||||
@@ -225,17 +228,14 @@ func (llm *dynExtServer) Predict(ctx context.Context, predict PredictOpts, fn fu
|
||||
}
|
||||
|
||||
retryNeeded := false
|
||||
// keep track of the last token generated, this is used to abort if the model starts looping
|
||||
var lastToken string
|
||||
var tokenRepeat int
|
||||
out:
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
// This handles the request cancellation
|
||||
C.dyn_llama_server_completion_cancel(llm.s, resp.id, &resp)
|
||||
if resp.id < 0 {
|
||||
return extServerResponseToErr(resp)
|
||||
} else {
|
||||
return nil
|
||||
}
|
||||
return cancelCompletion(llm, resp)
|
||||
default:
|
||||
var result C.ext_server_task_result_t
|
||||
C.dyn_llama_server_completion_next_result(llm.s, resp.id, &result)
|
||||
@@ -258,6 +258,20 @@ func (llm *dynExtServer) Predict(ctx context.Context, predict PredictOpts, fn fu
|
||||
break out
|
||||
}
|
||||
|
||||
switch {
|
||||
case strings.TrimSpace(p.Content) == lastToken:
|
||||
tokenRepeat++
|
||||
default:
|
||||
lastToken = strings.TrimSpace(p.Content)
|
||||
tokenRepeat = 0
|
||||
}
|
||||
|
||||
// 30 picked as an arbitrary max token repeat limit, modify as needed
|
||||
if tokenRepeat > 30 {
|
||||
slog.Debug("prediction aborted, token repeat limit reached")
|
||||
return cancelCompletion(llm, resp)
|
||||
}
|
||||
|
||||
if p.Content != "" {
|
||||
fn(PredictResult{
|
||||
Content: p.Content,
|
||||
@@ -285,6 +299,15 @@ func (llm *dynExtServer) Predict(ctx context.Context, predict PredictOpts, fn fu
|
||||
return fmt.Errorf("max retries exceeded")
|
||||
}
|
||||
|
||||
func cancelCompletion(llm *dynExtServer, resp C.ext_server_resp_t) error {
|
||||
C.dyn_llama_server_completion_cancel(llm.s, resp.id, &resp)
|
||||
if resp.id < 0 {
|
||||
return extServerResponseToErr(resp)
|
||||
} else {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
func (llm *dynExtServer) Encode(ctx context.Context, prompt string) ([]int, error) {
|
||||
data, err := json.Marshal(TokenizeRequest{Content: prompt})
|
||||
if err != nil {
|
||||
|
||||
@@ -26,7 +26,7 @@
|
||||
#endif // GGML_USE_CUBLAS
|
||||
|
||||
// Expose the llama server as a callable extern "C" API
|
||||
server_context *llama = NULL;
|
||||
llama_server_context *llama = NULL;
|
||||
std::thread ext_server_thread;
|
||||
bool shutting_down = false;
|
||||
std::atomic_int recv_counter;
|
||||
@@ -57,7 +57,7 @@ void llama_server_init(ext_server_params *sparams, ext_server_resp_t *err) {
|
||||
err->id = 0;
|
||||
err->msg[0] = '\0';
|
||||
try {
|
||||
llama = new server_context;
|
||||
llama = new llama_server_context;
|
||||
gpt_params params;
|
||||
params.n_ctx = sparams->n_ctx;
|
||||
params.n_batch = sparams->n_batch;
|
||||
@@ -114,18 +114,14 @@ void llama_server_init(ext_server_params *sparams, ext_server_resp_t *err) {
|
||||
llama_backend_init();
|
||||
llama_numa_init(params.numa);
|
||||
|
||||
// load the model
|
||||
if (!llama->load_model(params)) {
|
||||
// TODO - consider modifying the logging logic or patching load_model so
|
||||
// we can capture more detailed error messages and pass them back to the
|
||||
// caller for better UX
|
||||
err->id = -1;
|
||||
snprintf(err->msg, err->msg_len, "error loading model %s",
|
||||
params.model.c_str());
|
||||
return;
|
||||
}
|
||||
if (!llama->load_model(params)) {
|
||||
// an error occurred that was not thrown
|
||||
err->id = -1;
|
||||
snprintf(err->msg, err->msg_len, "error loading model %s", params.model.c_str());
|
||||
return;
|
||||
}
|
||||
|
||||
llama->init();
|
||||
llama->initialize();
|
||||
} catch (std::exception &e) {
|
||||
err->id = -1;
|
||||
snprintf(err->msg, err->msg_len, "exception %s", e.what());
|
||||
@@ -144,13 +140,13 @@ void llama_server_start() {
|
||||
LOG_TEE("llama server main loop starting\n");
|
||||
ggml_time_init();
|
||||
llama->queue_tasks.on_new_task(std::bind(
|
||||
&server_context::process_single_task, llama, std::placeholders::_1));
|
||||
&llama_server_context::process_single_task, llama, std::placeholders::_1));
|
||||
llama->queue_tasks.on_finish_multitask(std::bind(
|
||||
&server_context::on_finish_multitask, llama, std::placeholders::_1));
|
||||
&llama_server_context::on_finish_multitask, llama, std::placeholders::_1));
|
||||
llama->queue_tasks.on_run_slots(std::bind(
|
||||
&server_context::update_slots, llama));
|
||||
&llama_server_context::update_slots, llama));
|
||||
llama->queue_results.on_multitask_update(std::bind(
|
||||
&server_queue::update_multitask,
|
||||
&llama_server_queue::update_multitask,
|
||||
&llama->queue_tasks,
|
||||
std::placeholders::_1,
|
||||
std::placeholders::_2,
|
||||
@@ -198,7 +194,7 @@ void llama_server_completion(const char *json_req, ext_server_resp_t *resp) {
|
||||
json data = json::parse(json_req);
|
||||
resp->id = llama->queue_tasks.get_new_id();
|
||||
llama->queue_results.add_waiting_task_id(resp->id);
|
||||
llama->request_completion(resp->id, -1, data, false, false);
|
||||
llama->request_completion(resp->id, data, false, false, -1);
|
||||
} catch (std::exception &e) {
|
||||
snprintf(resp->msg, resp->msg_len, "exception %s", e.what());
|
||||
} catch (...) {
|
||||
@@ -216,9 +212,9 @@ void llama_server_completion_next_result(const int task_id,
|
||||
std::string result_json;
|
||||
try {
|
||||
atomicRecv ar(recv_counter);
|
||||
server_task_result result = llama->queue_results.recv(task_id);
|
||||
task_result result = llama->queue_results.recv(task_id);
|
||||
result_json =
|
||||
result.data.dump(-1, ' ', false, json::error_handler_t::replace);
|
||||
result.result_json.dump(-1, ' ', false, json::error_handler_t::replace);
|
||||
resp->id = result.id;
|
||||
resp->stop = result.stop;
|
||||
resp->error = result.error;
|
||||
@@ -363,10 +359,10 @@ void llama_server_embedding(const char *json_req, char **json_resp,
|
||||
}
|
||||
const int task_id = llama->queue_tasks.get_new_id();
|
||||
llama->queue_results.add_waiting_task_id(task_id);
|
||||
llama->request_completion(task_id, -1, {{"prompt", prompt}, {"n_predict", 0}}, false, true);
|
||||
llama->request_completion(task_id, {{"prompt", prompt}, {"n_predict", 0}}, false, true, -1);
|
||||
atomicRecv ar(recv_counter);
|
||||
server_task_result result = llama->queue_results.recv(task_id);
|
||||
std::string result_json = result.data.dump();
|
||||
task_result result = llama->queue_results.recv(task_id);
|
||||
std::string result_json = result.result_json.dump();
|
||||
const std::string::size_type size = result_json.size() + 1;
|
||||
*json_resp = new char[size];
|
||||
snprintf(*json_resp, size, "%s", result_json.c_str());
|
||||
|
||||
@@ -18,19 +18,6 @@ sign() {
|
||||
fi
|
||||
}
|
||||
|
||||
# bundle_metal bundles ggml-common.h and ggml-metal.metal into a single file
|
||||
bundle_metal() {
|
||||
grep -v '#include "ggml-common.h"' "${LLAMACPP_DIR}/ggml-metal.metal" | grep -v '#pragma once' > "${LLAMACPP_DIR}/ggml-metal.metal.temp"
|
||||
echo '#define GGML_COMMON_IMPL_METAL' > "${LLAMACPP_DIR}/ggml-metal.metal"
|
||||
cat "${LLAMACPP_DIR}/ggml-common.h" | grep -v '#pragma once' >> "${LLAMACPP_DIR}/ggml-metal.metal"
|
||||
cat "${LLAMACPP_DIR}/ggml-metal.metal.temp" >> "${LLAMACPP_DIR}/ggml-metal.metal"
|
||||
rm "${LLAMACPP_DIR}/ggml-metal.metal.temp"
|
||||
}
|
||||
|
||||
cleanup_metal() {
|
||||
(cd ${LLAMACPP_DIR} && git checkout ggml-metal.metal)
|
||||
}
|
||||
|
||||
COMMON_DARWIN_DEFS="-DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 -DCMAKE_SYSTEM_NAME=Darwin"
|
||||
|
||||
case "${GOARCH}" in
|
||||
@@ -76,11 +63,9 @@ case "${GOARCH}" in
|
||||
CMAKE_DEFS="${COMMON_DARWIN_DEFS} -DLLAMA_METAL_EMBED_LIBRARY=on -DLLAMA_ACCELERATE=on -DCMAKE_SYSTEM_PROCESSOR=${ARCH} -DCMAKE_OSX_ARCHITECTURES=${ARCH} -DLLAMA_METAL=on ${CMAKE_DEFS}"
|
||||
BUILD_DIR="${LLAMACPP_DIR}/build/darwin/${ARCH}/metal"
|
||||
EXTRA_LIBS="${EXTRA_LIBS} -framework Accelerate -framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders"
|
||||
bundle_metal
|
||||
build
|
||||
sign ${LLAMACPP_DIR}/build/darwin/${ARCH}/metal/lib/libext_server.dylib
|
||||
compress_libs
|
||||
cleanup_metal
|
||||
;;
|
||||
*)
|
||||
echo "GOARCH must be set"
|
||||
|
||||
@@ -185,7 +185,7 @@ if [ -d "${ROCM_PATH}" ]; then
|
||||
init_vars
|
||||
CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DLLAMA_HIPBLAS=on -DCMAKE_C_COMPILER=$ROCM_PATH/llvm/bin/clang -DCMAKE_CXX_COMPILER=$ROCM_PATH/llvm/bin/clang++ -DAMDGPU_TARGETS=$(amdGPUs) -DGPU_TARGETS=$(amdGPUs)"
|
||||
BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/rocm${ROCM_VARIANT}"
|
||||
EXTRA_LIBS="-L${ROCM_PATH}/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ -Wl,-rpath,\$ORIGIN/../rocm/ -lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu"
|
||||
EXTRA_LIBS="-L${ROCM_PATH}/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ -Wl,-rpath,\$ORIGIN/../../rocm/ -lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu"
|
||||
build
|
||||
|
||||
# Record the ROCM dependencies
|
||||
@@ -194,6 +194,12 @@ if [ -d "${ROCM_PATH}" ]; then
|
||||
for dep in $(ldd "${BUILD_DIR}/lib/libext_server.so" | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -e rocm -e amdgpu -e libtinfo ); do
|
||||
echo "${dep}" >> "${BUILD_DIR}/lib/deps.txt"
|
||||
done
|
||||
# bomb out if for some reason we didn't get a few deps
|
||||
if [ $(cat "${BUILD_DIR}/lib/deps.txt" | wc -l ) -lt 8 ] ; then
|
||||
cat "${BUILD_DIR}/lib/deps.txt"
|
||||
echo "ERROR: deps file short"
|
||||
exit 1
|
||||
fi
|
||||
compress_libs
|
||||
fi
|
||||
|
||||
|
||||
Submodule llm/llama.cpp updated: 77d1ac7e00...ceca1aef07
@@ -1,19 +1,21 @@
|
||||
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
|
||||
index f255ad76..914ecfdd 100644
|
||||
index 8fe5e0b1..3e82acb9 100644
|
||||
--- a/examples/server/server.cpp
|
||||
+++ b/examples/server/server.cpp
|
||||
@@ -1101,12 +1101,13 @@ struct server_context {
|
||||
@@ -997,13 +997,15 @@ struct llama_server_context
|
||||
slot.n_sent_text += result.text_to_send.size();
|
||||
// add the token to slot queue and cache
|
||||
}
|
||||
|
||||
- slot.add_token_string(result);
|
||||
if (slot.params.stream) {
|
||||
+
|
||||
if (slot.params.stream)
|
||||
{
|
||||
send_partial_response(slot, result);
|
||||
}
|
||||
}
|
||||
|
||||
+ slot.add_token_string(result);
|
||||
+
|
||||
if (incomplete) {
|
||||
if (incomplete)
|
||||
{
|
||||
slot.has_next_token = true;
|
||||
}
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
|
||||
index b14cca61..02bfd4b1 100644
|
||||
index 8fe5e0b1..53bf39c1 100644
|
||||
--- a/examples/server/server.cpp
|
||||
+++ b/examples/server/server.cpp
|
||||
@@ -29,6 +29,10 @@
|
||||
@@ -31,6 +31,10 @@
|
||||
#include <atomic>
|
||||
#include <signal.h>
|
||||
#include <memory>
|
||||
|
||||
+#ifdef GGML_USE_CUBLAS
|
||||
+extern "C" GGML_CALL void ggml_free_cublas(void);
|
||||
@@ -12,8 +12,8 @@ index b14cca61..02bfd4b1 100644
|
||||
+
|
||||
using json = nlohmann::json;
|
||||
|
||||
bool server_verbose = false;
|
||||
@@ -664,6 +668,10 @@ struct server_context {
|
||||
struct server_params {
|
||||
@@ -363,6 +367,10 @@ struct llama_server_context
|
||||
llama_free_model(model);
|
||||
model = nullptr;
|
||||
}
|
||||
@@ -23,8 +23,8 @@ index b14cca61..02bfd4b1 100644
|
||||
+#endif
|
||||
}
|
||||
|
||||
bool load_model(const gpt_params & params_) {
|
||||
@@ -3499,6 +3507,7 @@ int main(int argc, char ** argv) {
|
||||
bool load_model(const gpt_params ¶ms_)
|
||||
@@ -3543,6 +3551,7 @@ int main(int argc, char **argv)
|
||||
sigemptyset (&sigint_action.sa_mask);
|
||||
sigint_action.sa_flags = 0;
|
||||
sigaction(SIGINT, &sigint_action, NULL);
|
||||
@@ -33,10 +33,10 @@ index b14cca61..02bfd4b1 100644
|
||||
auto console_ctrl_handler = +[](DWORD ctrl_type) -> BOOL {
|
||||
return (ctrl_type == CTRL_C_EVENT) ? (signal_handler(SIGINT), true) : false;
|
||||
diff --git a/ggml-cuda.cu b/ggml-cuda.cu
|
||||
index c207ff87..945708a4 100644
|
||||
index 72bcec8c..6c934e8c 100644
|
||||
--- a/ggml-cuda.cu
|
||||
+++ b/ggml-cuda.cu
|
||||
@@ -46,6 +46,7 @@
|
||||
@@ -43,6 +43,7 @@
|
||||
#define __shfl_xor_sync(mask, var, laneMask, width) __shfl_xor(var, laneMask, width)
|
||||
#define cublasComputeType_t hipblasDatatype_t //deprecated, new hipblasComputeType_t not in 5.6
|
||||
#define cublasCreate hipblasCreate
|
||||
@@ -44,7 +44,7 @@ index c207ff87..945708a4 100644
|
||||
#define cublasGemmEx hipblasGemmEx
|
||||
#define cublasGemmBatchedEx hipblasGemmBatchedEx
|
||||
#define cublasGemmStridedBatchedEx hipblasGemmStridedBatchedEx
|
||||
@@ -8014,10 +8015,10 @@ GGML_CALL bool ggml_cublas_loaded(void) {
|
||||
@@ -8751,10 +8752,10 @@ GGML_CALL bool ggml_cublas_loaded(void) {
|
||||
return g_cublas_loaded;
|
||||
}
|
||||
|
||||
@@ -58,7 +58,7 @@ index c207ff87..945708a4 100644
|
||||
|
||||
#ifdef __HIP_PLATFORM_AMD__
|
||||
// Workaround for a rocBLAS bug when using multiple graphics cards:
|
||||
@@ -8027,7 +8028,7 @@ GGML_CALL void ggml_init_cublas() {
|
||||
@@ -8764,7 +8765,7 @@ GGML_CALL void ggml_init_cublas() {
|
||||
#endif
|
||||
|
||||
if (cudaGetDeviceCount(&g_device_count) != cudaSuccess) {
|
||||
@@ -67,7 +67,7 @@ index c207ff87..945708a4 100644
|
||||
g_cublas_loaded = false;
|
||||
fprintf(stderr, "%s: no " GGML_CUDA_NAME " devices found, " GGML_CUDA_NAME " will be disabled\n", __func__);
|
||||
return;
|
||||
@@ -8098,7 +8099,7 @@ GGML_CALL void ggml_init_cublas() {
|
||||
@@ -8835,7 +8836,7 @@ GGML_CALL void ggml_init_cublas() {
|
||||
// configure logging to stdout
|
||||
// CUBLAS_CHECK(cublasLoggerConfigure(1, 1, 0, nullptr));
|
||||
|
||||
@@ -76,7 +76,7 @@ index c207ff87..945708a4 100644
|
||||
g_cublas_loaded = true;
|
||||
}
|
||||
}
|
||||
@@ -11753,3 +11754,23 @@ GGML_CALL int ggml_backend_cuda_reg_devices() {
|
||||
@@ -12490,3 +12491,23 @@ GGML_CALL int ggml_backend_cuda_reg_devices() {
|
||||
}
|
||||
return device_count;
|
||||
}
|
||||
@@ -100,6 +100,7 @@ index c207ff87..945708a4 100644
|
||||
+
|
||||
+ g_cublas_initialized = false;
|
||||
+}
|
||||
\ No newline at end of file
|
||||
diff --git a/ggml-cuda.h b/ggml-cuda.h
|
||||
index b1ebd61d..6dd58ddf 100644
|
||||
--- a/ggml-cuda.h
|
||||
|
||||
44
llm/patches/03-load_exception.diff
Normal file
44
llm/patches/03-load_exception.diff
Normal file
@@ -0,0 +1,44 @@
|
||||
diff --git a/llama.cpp b/llama.cpp
|
||||
index 4225f955..7b762f86 100644
|
||||
--- a/llama.cpp
|
||||
+++ b/llama.cpp
|
||||
@@ -4756,7 +4756,7 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam
|
||||
}
|
||||
} catch (const std::exception & err) {
|
||||
LLAMA_LOG_ERROR("%s: error loading model: %s\n", __func__, err.what());
|
||||
- return -1;
|
||||
+ throw;
|
||||
}
|
||||
|
||||
return 0;
|
||||
@@ -12102,16 +12102,22 @@ struct llama_model * llama_load_model_from_file(
|
||||
};
|
||||
}
|
||||
|
||||
- int status = llama_model_load(path_model, *model, params);
|
||||
- GGML_ASSERT(status <= 0);
|
||||
- if (status < 0) {
|
||||
- if (status == -1) {
|
||||
- LLAMA_LOG_ERROR("%s: failed to load model\n", __func__);
|
||||
- } else if (status == -2) {
|
||||
- LLAMA_LOG_INFO("%s: cancelled model load\n", __func__);
|
||||
+ try {
|
||||
+ int status = llama_model_load(path_model, *model, params);
|
||||
+ GGML_ASSERT(status <= 0);
|
||||
+ if (status < 0) {
|
||||
+ if (status == -1) {
|
||||
+ LLAMA_LOG_ERROR("%s: failed to load model\n", __func__);
|
||||
+ } else if (status == -2) {
|
||||
+ LLAMA_LOG_INFO("%s: cancelled model load\n", __func__);
|
||||
+ }
|
||||
+ delete model;
|
||||
+ return nullptr;
|
||||
}
|
||||
+ } catch (...) {
|
||||
+ LLAMA_LOG_ERROR("%s: exception loading model\n", __func__);
|
||||
delete model;
|
||||
- return nullptr;
|
||||
+ throw;
|
||||
}
|
||||
|
||||
return model;
|
||||
@@ -1,10 +1,10 @@
|
||||
diff --git a/llama.cpp b/llama.cpp
|
||||
index b19616e8..519b9602 100644
|
||||
index b27aa272..99372f9c 100644
|
||||
--- a/llama.cpp
|
||||
+++ b/llama.cpp
|
||||
@@ -9938,7 +9938,7 @@ struct llm_tokenizer_wpm {
|
||||
@@ -9360,7 +9360,7 @@ struct llm_tokenizer_wpm {
|
||||
}
|
||||
|
||||
|
||||
uint32_t to_lower(uint32_t code) {
|
||||
- static const std::locale locale("en_US.UTF-8");
|
||||
+ static const std::locale locale("");
|
||||
@@ -10,8 +10,8 @@ mkdir -p dist
|
||||
for TARGETARCH in arm64 amd64; do
|
||||
rm -rf llm/llama.cpp/build
|
||||
GOOS=darwin GOARCH=$TARGETARCH go generate ./...
|
||||
CGO_ENABLED=1 GOOS=darwin GOARCH=$TARGETARCH go build -o dist/ollama-darwin-$TARGETARCH
|
||||
CGO_ENABLED=1 GOOS=darwin GOARCH=$TARGETARCH go build -cover -o dist/ollama-darwin-$TARGETARCH-cov
|
||||
CGO_ENABLED=1 GOOS=darwin GOARCH=$TARGETARCH go build -trimpath -o dist/ollama-darwin-$TARGETARCH
|
||||
CGO_ENABLED=1 GOOS=darwin GOARCH=$TARGETARCH go build -trimpath -cover -o dist/ollama-darwin-$TARGETARCH-cov
|
||||
done
|
||||
|
||||
lipo -create -output dist/ollama dist/ollama-darwin-arm64 dist/ollama-darwin-amd64
|
||||
|
||||
@@ -53,7 +53,7 @@ function buildOllama() {
|
||||
write-host "Building ollama CLI"
|
||||
& go generate ./...
|
||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
||||
& go build -ldflags "-s -w -X=github.com/jmorganca/ollama/version.Version=$script:VERSION -X=github.com/jmorganca/ollama/server.mode=release" .
|
||||
& go build -trimpath -ldflags "-s -w -X=github.com/jmorganca/ollama/version.Version=$script:VERSION -X=github.com/jmorganca/ollama/server.mode=release" .
|
||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
||||
if ("${env:KEY_CONTAINER}") {
|
||||
& "${script:SignTool}" sign /v /fd sha256 /t http://timestamp.digicert.com /f "${script:OLLAMA_CERT}" `
|
||||
@@ -68,7 +68,7 @@ function buildApp() {
|
||||
write-host "Building Ollama App"
|
||||
cd "${script:SRC_DIR}\app"
|
||||
& windres -l 0 -o ollama.syso ollama.rc
|
||||
& go build -ldflags "-s -w -H windowsgui -X=github.com/jmorganca/ollama/version.Version=$script:VERSION -X=github.com/jmorganca/ollama/server.mode=release" .
|
||||
& go build -trimpath -ldflags "-s -w -H windowsgui -X=github.com/jmorganca/ollama/version.Version=$script:VERSION -X=github.com/jmorganca/ollama/server.mode=release" .
|
||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
||||
if ("${env:KEY_CONTAINER}") {
|
||||
& "${script:SignTool}" sign /v /fd sha256 /t http://timestamp.digicert.com /f "${script:OLLAMA_CERT}" `
|
||||
|
||||
@@ -795,10 +795,8 @@ func PruneLayers() error {
|
||||
|
||||
for _, blob := range blobs {
|
||||
name := blob.Name()
|
||||
if runtime.GOOS == "windows" {
|
||||
name = strings.ReplaceAll(name, "-", ":")
|
||||
}
|
||||
if strings.HasPrefix(name, "sha256:") {
|
||||
name = strings.ReplaceAll(name, "-", ":")
|
||||
if strings.HasPrefix(name, "sha256-") {
|
||||
deleteMap[name] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,7 +5,6 @@ import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"runtime"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/exp/slices"
|
||||
@@ -47,10 +46,7 @@ func NewLayer(r io.Reader, mediatype string) (*Layer, error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
delimiter := ":"
|
||||
if runtime.GOOS == "windows" {
|
||||
delimiter = "-"
|
||||
}
|
||||
const delimiter = "-"
|
||||
|
||||
pattern := strings.Join([]string{"sha256", "*-partial"}, delimiter)
|
||||
temp, err := os.CreateTemp(blobs, pattern)
|
||||
|
||||
@@ -6,7 +6,6 @@ import (
|
||||
"net/url"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strings"
|
||||
)
|
||||
|
||||
@@ -150,10 +149,7 @@ func GetBlobsPath(digest string) (string, error) {
|
||||
return "", err
|
||||
}
|
||||
|
||||
if runtime.GOOS == "windows" {
|
||||
digest = strings.ReplaceAll(digest, ":", "-")
|
||||
}
|
||||
|
||||
digest = strings.ReplaceAll(digest, ":", "-")
|
||||
path := filepath.Join(dir, "blobs", digest)
|
||||
dirPath := filepath.Dir(path)
|
||||
if digest == "" {
|
||||
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"io"
|
||||
"io/fs"
|
||||
"log/slog"
|
||||
"math"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/netip"
|
||||
@@ -16,6 +17,7 @@ import (
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"syscall"
|
||||
@@ -207,7 +209,7 @@ func GenerateHandler(c *gin.Context) {
|
||||
|
||||
var sessionDuration time.Duration
|
||||
if req.KeepAlive == nil {
|
||||
sessionDuration = defaultSessionDuration
|
||||
sessionDuration = getDefaultSessionDuration()
|
||||
} else {
|
||||
sessionDuration = req.KeepAlive.Duration
|
||||
}
|
||||
@@ -384,6 +386,32 @@ func GenerateHandler(c *gin.Context) {
|
||||
streamResponse(c, ch)
|
||||
}
|
||||
|
||||
func getDefaultSessionDuration() time.Duration {
|
||||
if t, exists := os.LookupEnv("OLLAMA_KEEP_ALIVE"); exists {
|
||||
v, err := strconv.Atoi(t)
|
||||
if err != nil {
|
||||
d, err := time.ParseDuration(t)
|
||||
if err != nil {
|
||||
return defaultSessionDuration
|
||||
}
|
||||
|
||||
if d < 0 {
|
||||
return time.Duration(math.MaxInt64)
|
||||
}
|
||||
|
||||
return d
|
||||
}
|
||||
|
||||
d := time.Duration(v) * time.Second
|
||||
if d < 0 {
|
||||
return time.Duration(math.MaxInt64)
|
||||
}
|
||||
return d
|
||||
}
|
||||
|
||||
return defaultSessionDuration
|
||||
}
|
||||
|
||||
func EmbeddingsHandler(c *gin.Context) {
|
||||
loaded.mu.Lock()
|
||||
defer loaded.mu.Unlock()
|
||||
@@ -427,7 +455,7 @@ func EmbeddingsHandler(c *gin.Context) {
|
||||
|
||||
var sessionDuration time.Duration
|
||||
if req.KeepAlive == nil {
|
||||
sessionDuration = defaultSessionDuration
|
||||
sessionDuration = getDefaultSessionDuration()
|
||||
} else {
|
||||
sessionDuration = req.KeepAlive.Duration
|
||||
}
|
||||
@@ -1228,7 +1256,7 @@ func ChatHandler(c *gin.Context) {
|
||||
|
||||
var sessionDuration time.Duration
|
||||
if req.KeepAlive == nil {
|
||||
sessionDuration = defaultSessionDuration
|
||||
sessionDuration = getDefaultSessionDuration()
|
||||
} else {
|
||||
sessionDuration = req.KeepAlive.Duration
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user