fix(xsysinfo): make reported system RAM total cgroup/lxcfs-aware (#8059)

GetSystemRAMInfo derived Total from memory.TotalMemory(), which on Linux
uses syscall.Sysinfo().Totalram - the HOST kernel total. lxcfs/LXD does
NOT virtualize that value, while MemAvailable (used for Free/Available)
IS virtualized. Inside an LXD/container with a 128Gi host but a ~10Gi
container view this produced Total=128Gi, Available=10Gi => Used=118Gi,
reporting ~92% RAM usage on an idle container.

Derive Total instead from the minimum of all non-zero, non-unlimited
candidates: cgroup v2 memory.max, cgroup v1 memory.limit_in_bytes (the
kernel unlimited sentinel is ignored), /proc/meminfo MemTotal (which
lxcfs virtualizes), and the syscall.Sysinfo total as the bare-metal
fallback. On bare metal every candidate is unlimited or equals the host
total, so behavior is unchanged.

The selection/parsing lives in a pure function chooseTotalMemory(...)
taking file CONTENTS, unit-tested without a real LXD host; OS file
reads stay in a thin wrapper.

Assisted-by: claude:claude-opus-4-8 [Claude Code]
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto
2026-06-12 22:05:54 +00:00
parent 51f4f67c47
commit 12f4c3a002
3 changed files with 242 additions and 5 deletions

View File

@@ -1,9 +1,19 @@
package xsysinfo
import (
"os"
"github.com/mudler/memory"
)
// cgroup/proc paths used to make the reported RAM total container-aware.
// They are variables (not consts) so tests could override them if needed.
var (
cgroupV2MaxPath = "/sys/fs/cgroup/memory.max"
cgroupV1LimitPath = "/sys/fs/cgroup/memory/memory.limit_in_bytes"
procMemInfoPath = "/proc/meminfo"
)
// SystemRAMInfo contains system RAM usage information
type SystemRAMInfo struct {
Total uint64 `json:"total"`
@@ -13,12 +23,45 @@ type SystemRAMInfo struct {
UsagePercent float64 `json:"usage_percent"`
}
// readFileBestEffort reads a file and returns its contents, or "" on any error.
// Missing cgroup/proc files (e.g. on non-Linux hosts) are expected and benign.
func readFileBestEffort(path string) string {
b, err := os.ReadFile(path)
if err != nil {
return ""
}
return string(b)
}
// systemTotalMemory returns the container-aware total system RAM in bytes.
//
// memory.TotalMemory() reports the HOST kernel total (syscall.Sysinfo on
// Linux), which lxcfs/LXD does NOT virtualize. Inside a container that
// over-reports physical RAM and, combined with the virtualized MemAvailable,
// inflates the reported usage (see issue #8059). We instead derive the total
// from the minimum of all available container-aware candidates.
func systemTotalMemory() uint64 {
return chooseTotalMemory(
readFileBestEffort(cgroupV2MaxPath),
readFileBestEffort(cgroupV1LimitPath),
readFileBestEffort(procMemInfoPath),
memory.TotalMemory(),
)
}
// GetSystemRAMInfo returns real-time system RAM usage
func GetSystemRAMInfo() (*SystemRAMInfo, error) {
total := memory.TotalMemory()
free := memory.AvailableMemory()
total := systemTotalMemory()
available := memory.AvailableMemory()
used := total - free
// AvailableMemory (MemAvailable) is virtualized by lxcfs, so in edge
// cases it can exceed our corrected total; clamp to avoid an unsigned
// underflow when computing Used.
if available > total {
available = total
}
used := total - available
usagePercent := 0.0
if total > 0 {
@@ -27,8 +70,8 @@ func GetSystemRAMInfo() (*SystemRAMInfo, error) {
return &SystemRAMInfo{
Total: total,
Used: used,
Free: free,
Available: total - used,
Free: available,
Available: available,
UsagePercent: usagePercent,
}, nil
}

View File

@@ -0,0 +1,120 @@
package xsysinfo
import (
"strconv"
"strings"
)
// cgroupV1UnlimitedSentinel is the value the kernel writes to
// memory.limit_in_bytes when no limit is set. It is PAGE_COUNTER_MAX
// (LONG_MAX rounded down to a page boundary), i.e. 0x7FFFFFFFFFFFF000 on
// 4 KiB-page systems. Any value at or above this is treated as "no limit".
const cgroupV1UnlimitedSentinel = uint64(0x7FFFFFFFFFFFF000)
// parseUintField parses a trimmed unsigned integer from raw file contents.
// It returns (0, false) when the content is empty or not a number.
func parseUintField(raw string) (uint64, bool) {
s := strings.TrimSpace(raw)
if s == "" {
return 0, false
}
v, err := strconv.ParseUint(s, 10, 64)
if err != nil {
return 0, false
}
return v, true
}
// parseCgroupV2Max interprets the contents of cgroup v2 memory.max.
// The literal "max" means unlimited, returning 0.
func parseCgroupV2Max(raw string) uint64 {
if strings.TrimSpace(raw) == "max" {
return 0
}
v, ok := parseUintField(raw)
if !ok {
return 0
}
return v
}
// parseCgroupV1Limit interprets the contents of cgroup v1
// memory.limit_in_bytes. The kernel's "unlimited" sentinel (a value at or
// above PAGE_COUNTER_MAX) is treated as no limit, returning 0.
func parseCgroupV1Limit(raw string) uint64 {
v, ok := parseUintField(raw)
if !ok {
return 0
}
if v >= cgroupV1UnlimitedSentinel {
return 0
}
return v
}
// parseMemTotal extracts the MemTotal value (in bytes) from raw
// /proc/meminfo contents. MemTotal is reported in kibibytes, so the parsed
// value is multiplied by 1024. Returns 0 when the field is missing.
func parseMemTotal(raw string) uint64 {
for _, line := range strings.Split(raw, "\n") {
if !strings.HasPrefix(line, "MemTotal:") {
continue
}
fields := strings.Fields(line)
// Expected: ["MemTotal:", "<value>", "kB"]
if len(fields) < 2 {
return 0
}
v, err := strconv.ParseUint(fields[1], 10, 64)
if err != nil {
return 0
}
if len(fields) >= 3 {
switch strings.ToLower(fields[2]) {
case "kb":
return v * 1024
case "mb":
return v * 1024 * 1024
case "gb":
return v * 1024 * 1024 * 1024
}
}
return v
}
return 0
}
// chooseTotalMemory selects the most accurate system RAM total in bytes.
//
// On Linux the host kernel total (sysinfoTotal, from syscall.Sysinfo) is NOT
// virtualized by lxcfs/LXD, so inside a container it over-reports physical
// RAM. The cgroup limits and /proc/meminfo MemTotal, by contrast, do reflect
// the container's view. We therefore take the MINIMUM of all non-zero,
// non-unlimited candidates:
//
// - cgroup v2 memory.max ("max" => unlimited, skipped)
// - cgroup v1 memory.limit_in_bytes (kernel sentinel => unlimited, skipped)
// - /proc/meminfo MemTotal (lxcfs/LXD virtualizes this)
// - sysinfoTotal (bare-metal fallback)
//
// On bare metal the cgroup limits are unlimited and MemTotal == sysinfoTotal,
// so the result equals the host total exactly as before.
func chooseTotalMemory(cgroupV2Max, cgroupV1Limit, procMemInfo string, sysinfoTotal uint64) uint64 {
candidates := []uint64{
parseCgroupV2Max(cgroupV2Max),
parseCgroupV1Limit(cgroupV1Limit),
parseMemTotal(procMemInfo),
sysinfoTotal,
}
var best uint64
for _, c := range candidates {
if c == 0 {
continue
}
if best == 0 || c < best {
best = c
}
}
return best
}

View File

@@ -0,0 +1,74 @@
package xsysinfo
import (
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
var _ = Describe("chooseTotalMemory", func() {
const (
gi128 = uint64(128) * 1024 * 1024 * 1024
gi20 = uint64(20) * 1024 * 1024 * 1024
gi10 = uint64(10) * 1024 * 1024 * 1024
)
// /proc/meminfo MemTotal is in kB; build a snippet for a given byte total.
memInfo := func(bytes uint64) string {
kb := bytes / 1024
return "MemTotal: " + itoa(kb) + " kB\nMemFree: 123 kB\n"
}
Context("bare metal (no cgroup cap, memory.max == max)", func() {
It("uses the host sysinfo total", func() {
// MemTotal mirrors sysinfo on bare metal.
got := chooseTotalMemory("max\n", string(rune(0)), memInfo(gi128), gi128)
Expect(got).To(Equal(gi128))
})
})
Context("LXD/lxcfs container (MemTotal virtualized below host, no cap)", func() {
It("uses the virtualized MemTotal, not the host sysinfo total", func() {
// This is issue #8059: host sysinfo says 128Gi, but lxcfs
// virtualizes /proc/meminfo MemTotal to 20Gi and there is no
// cgroup cap. The corrected total must be 20Gi.
got := chooseTotalMemory("max\n", "", memInfo(gi20), gi128)
Expect(got).To(Equal(gi20))
})
})
Context("cgroup v2 cap set below MemTotal", func() {
It("uses the cgroup cap", func() {
got := chooseTotalMemory(itoa(gi10)+"\n", "", memInfo(gi20), gi128)
Expect(got).To(Equal(gi10))
})
})
Context("cgroup v1 with the kernel unlimited sentinel", func() {
It("ignores the sentinel and falls back to MemTotal", func() {
got := chooseTotalMemory("", "9223372036854771712\n", memInfo(gi20), gi128)
Expect(got).To(Equal(gi20))
})
})
Context("all candidates empty/unlimited", func() {
It("falls back to sysinfo total", func() {
got := chooseTotalMemory("max\n", "", "", gi128)
Expect(got).To(Equal(gi128))
})
})
})
// itoa is a tiny base-10 formatter to avoid importing strconv into the test.
func itoa(v uint64) string {
if v == 0 {
return "0"
}
var buf [20]byte
i := len(buf)
for v > 0 {
i--
buf[i] = byte('0' + v%10)
v /= 10
}
return string(buf[i:])
}