From f0e001b7f83fd46af6227a46f6089eebc318da5b Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sat, 13 Jun 2026 18:13:06 +0200 Subject: [PATCH] fix(xsysinfo): container-aware total RAM detection (cgroup/lxcfs) (#8059) (#10288) fix(xsysinfo): make reported system RAM total cgroup/lxcfs-aware (#8059) GetSystemRAMInfo derived Total from memory.TotalMemory(), which on Linux uses syscall.Sysinfo().Totalram - the HOST kernel total. lxcfs/LXD does NOT virtualize that value, while MemAvailable (used for Free/Available) IS virtualized. Inside an LXD/container with a 128Gi host but a ~10Gi container view this produced Total=128Gi, Available=10Gi => Used=118Gi, reporting ~92% RAM usage on an idle container. Derive Total instead from the minimum of all non-zero, non-unlimited candidates: cgroup v2 memory.max, cgroup v1 memory.limit_in_bytes (the kernel unlimited sentinel is ignored), /proc/meminfo MemTotal (which lxcfs virtualizes), and the syscall.Sysinfo total as the bare-metal fallback. On bare metal every candidate is unlimited or equals the host total, so behavior is unchanged. The selection/parsing lives in a pure function chooseTotalMemory(...) taking file CONTENTS, unit-tested without a real LXD host; OS file reads stay in a thin wrapper. Assisted-by: claude:claude-opus-4-8 [Claude Code] Signed-off-by: Ettore Di Giacinto Co-authored-by: Ettore Di Giacinto --- pkg/xsysinfo/memory.go | 53 +++++++++++-- pkg/xsysinfo/memory_total.go | 120 ++++++++++++++++++++++++++++++ pkg/xsysinfo/memory_total_test.go | 74 ++++++++++++++++++ 3 files changed, 242 insertions(+), 5 deletions(-) create mode 100644 pkg/xsysinfo/memory_total.go create mode 100644 pkg/xsysinfo/memory_total_test.go diff --git a/pkg/xsysinfo/memory.go b/pkg/xsysinfo/memory.go index ae40f1920..91d642edd 100644 --- a/pkg/xsysinfo/memory.go +++ b/pkg/xsysinfo/memory.go @@ -1,9 +1,19 @@ package xsysinfo import ( + "os" + "github.com/mudler/memory" ) +// cgroup/proc paths used to make the reported RAM total container-aware. +// They are variables (not consts) so tests could override them if needed. +var ( + cgroupV2MaxPath = "/sys/fs/cgroup/memory.max" + cgroupV1LimitPath = "/sys/fs/cgroup/memory/memory.limit_in_bytes" + procMemInfoPath = "/proc/meminfo" +) + // SystemRAMInfo contains system RAM usage information type SystemRAMInfo struct { Total uint64 `json:"total"` @@ -13,12 +23,45 @@ type SystemRAMInfo struct { UsagePercent float64 `json:"usage_percent"` } +// readFileBestEffort reads a file and returns its contents, or "" on any error. +// Missing cgroup/proc files (e.g. on non-Linux hosts) are expected and benign. +func readFileBestEffort(path string) string { + b, err := os.ReadFile(path) + if err != nil { + return "" + } + return string(b) +} + +// systemTotalMemory returns the container-aware total system RAM in bytes. +// +// memory.TotalMemory() reports the HOST kernel total (syscall.Sysinfo on +// Linux), which lxcfs/LXD does NOT virtualize. Inside a container that +// over-reports physical RAM and, combined with the virtualized MemAvailable, +// inflates the reported usage (see issue #8059). We instead derive the total +// from the minimum of all available container-aware candidates. +func systemTotalMemory() uint64 { + return chooseTotalMemory( + readFileBestEffort(cgroupV2MaxPath), + readFileBestEffort(cgroupV1LimitPath), + readFileBestEffort(procMemInfoPath), + memory.TotalMemory(), + ) +} + // GetSystemRAMInfo returns real-time system RAM usage func GetSystemRAMInfo() (*SystemRAMInfo, error) { - total := memory.TotalMemory() - free := memory.AvailableMemory() + total := systemTotalMemory() + available := memory.AvailableMemory() - used := total - free + // AvailableMemory (MemAvailable) is virtualized by lxcfs, so in edge + // cases it can exceed our corrected total; clamp to avoid an unsigned + // underflow when computing Used. + if available > total { + available = total + } + + used := total - available usagePercent := 0.0 if total > 0 { @@ -27,8 +70,8 @@ func GetSystemRAMInfo() (*SystemRAMInfo, error) { return &SystemRAMInfo{ Total: total, Used: used, - Free: free, - Available: total - used, + Free: available, + Available: available, UsagePercent: usagePercent, }, nil } diff --git a/pkg/xsysinfo/memory_total.go b/pkg/xsysinfo/memory_total.go new file mode 100644 index 000000000..1f4883952 --- /dev/null +++ b/pkg/xsysinfo/memory_total.go @@ -0,0 +1,120 @@ +package xsysinfo + +import ( + "strconv" + "strings" +) + +// cgroupV1UnlimitedSentinel is the value the kernel writes to +// memory.limit_in_bytes when no limit is set. It is PAGE_COUNTER_MAX +// (LONG_MAX rounded down to a page boundary), i.e. 0x7FFFFFFFFFFFF000 on +// 4 KiB-page systems. Any value at or above this is treated as "no limit". +const cgroupV1UnlimitedSentinel = uint64(0x7FFFFFFFFFFFF000) + +// parseUintField parses a trimmed unsigned integer from raw file contents. +// It returns (0, false) when the content is empty or not a number. +func parseUintField(raw string) (uint64, bool) { + s := strings.TrimSpace(raw) + if s == "" { + return 0, false + } + v, err := strconv.ParseUint(s, 10, 64) + if err != nil { + return 0, false + } + return v, true +} + +// parseCgroupV2Max interprets the contents of cgroup v2 memory.max. +// The literal "max" means unlimited, returning 0. +func parseCgroupV2Max(raw string) uint64 { + if strings.TrimSpace(raw) == "max" { + return 0 + } + v, ok := parseUintField(raw) + if !ok { + return 0 + } + return v +} + +// parseCgroupV1Limit interprets the contents of cgroup v1 +// memory.limit_in_bytes. The kernel's "unlimited" sentinel (a value at or +// above PAGE_COUNTER_MAX) is treated as no limit, returning 0. +func parseCgroupV1Limit(raw string) uint64 { + v, ok := parseUintField(raw) + if !ok { + return 0 + } + if v >= cgroupV1UnlimitedSentinel { + return 0 + } + return v +} + +// parseMemTotal extracts the MemTotal value (in bytes) from raw +// /proc/meminfo contents. MemTotal is reported in kibibytes, so the parsed +// value is multiplied by 1024. Returns 0 when the field is missing. +func parseMemTotal(raw string) uint64 { + for _, line := range strings.Split(raw, "\n") { + if !strings.HasPrefix(line, "MemTotal:") { + continue + } + fields := strings.Fields(line) + // Expected: ["MemTotal:", "", "kB"] + if len(fields) < 2 { + return 0 + } + v, err := strconv.ParseUint(fields[1], 10, 64) + if err != nil { + return 0 + } + if len(fields) >= 3 { + switch strings.ToLower(fields[2]) { + case "kb": + return v * 1024 + case "mb": + return v * 1024 * 1024 + case "gb": + return v * 1024 * 1024 * 1024 + } + } + return v + } + return 0 +} + +// chooseTotalMemory selects the most accurate system RAM total in bytes. +// +// On Linux the host kernel total (sysinfoTotal, from syscall.Sysinfo) is NOT +// virtualized by lxcfs/LXD, so inside a container it over-reports physical +// RAM. The cgroup limits and /proc/meminfo MemTotal, by contrast, do reflect +// the container's view. We therefore take the MINIMUM of all non-zero, +// non-unlimited candidates: +// +// - cgroup v2 memory.max ("max" => unlimited, skipped) +// - cgroup v1 memory.limit_in_bytes (kernel sentinel => unlimited, skipped) +// - /proc/meminfo MemTotal (lxcfs/LXD virtualizes this) +// - sysinfoTotal (bare-metal fallback) +// +// On bare metal the cgroup limits are unlimited and MemTotal == sysinfoTotal, +// so the result equals the host total exactly as before. +func chooseTotalMemory(cgroupV2Max, cgroupV1Limit, procMemInfo string, sysinfoTotal uint64) uint64 { + candidates := []uint64{ + parseCgroupV2Max(cgroupV2Max), + parseCgroupV1Limit(cgroupV1Limit), + parseMemTotal(procMemInfo), + sysinfoTotal, + } + + var best uint64 + for _, c := range candidates { + if c == 0 { + continue + } + if best == 0 || c < best { + best = c + } + } + return best +} diff --git a/pkg/xsysinfo/memory_total_test.go b/pkg/xsysinfo/memory_total_test.go new file mode 100644 index 000000000..4353eb109 --- /dev/null +++ b/pkg/xsysinfo/memory_total_test.go @@ -0,0 +1,74 @@ +package xsysinfo + +import ( + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("chooseTotalMemory", func() { + const ( + gi128 = uint64(128) * 1024 * 1024 * 1024 + gi20 = uint64(20) * 1024 * 1024 * 1024 + gi10 = uint64(10) * 1024 * 1024 * 1024 + ) + + // /proc/meminfo MemTotal is in kB; build a snippet for a given byte total. + memInfo := func(bytes uint64) string { + kb := bytes / 1024 + return "MemTotal: " + itoa(kb) + " kB\nMemFree: 123 kB\n" + } + + Context("bare metal (no cgroup cap, memory.max == max)", func() { + It("uses the host sysinfo total", func() { + // MemTotal mirrors sysinfo on bare metal. + got := chooseTotalMemory("max\n", string(rune(0)), memInfo(gi128), gi128) + Expect(got).To(Equal(gi128)) + }) + }) + + Context("LXD/lxcfs container (MemTotal virtualized below host, no cap)", func() { + It("uses the virtualized MemTotal, not the host sysinfo total", func() { + // This is issue #8059: host sysinfo says 128Gi, but lxcfs + // virtualizes /proc/meminfo MemTotal to 20Gi and there is no + // cgroup cap. The corrected total must be 20Gi. + got := chooseTotalMemory("max\n", "", memInfo(gi20), gi128) + Expect(got).To(Equal(gi20)) + }) + }) + + Context("cgroup v2 cap set below MemTotal", func() { + It("uses the cgroup cap", func() { + got := chooseTotalMemory(itoa(gi10)+"\n", "", memInfo(gi20), gi128) + Expect(got).To(Equal(gi10)) + }) + }) + + Context("cgroup v1 with the kernel unlimited sentinel", func() { + It("ignores the sentinel and falls back to MemTotal", func() { + got := chooseTotalMemory("", "9223372036854771712\n", memInfo(gi20), gi128) + Expect(got).To(Equal(gi20)) + }) + }) + + Context("all candidates empty/unlimited", func() { + It("falls back to sysinfo total", func() { + got := chooseTotalMemory("max\n", "", "", gi128) + Expect(got).To(Equal(gi128)) + }) + }) +}) + +// itoa is a tiny base-10 formatter to avoid importing strconv into the test. +func itoa(v uint64) string { + if v == 0 { + return "0" + } + var buf [20]byte + i := len(buf) + for v > 0 { + i-- + buf[i] = byte('0' + v%10) + v /= 10 + } + return string(buf[i:]) +}