try to produce more consistent mtimes

this is a bit of a shot in the dark, but I believe we may have
unnecessary delta in our images caused by the rebuilding of software on
a daily basis. this would result in mtimes changing when the files
actually do not.

a tiny mtimer tool is meant to work around that by consuming an input
json file of mtimes+checksums and if mtimes change it will checksum the
affected file to verify it actually has changed in content as well.
assuming reproducible builds this should result in far less delta in the
erofs and by extension the delta download
This commit is contained in:
Harald Sitter
2025-08-25 18:06:33 +02:00
parent 947cb878bd
commit 9719237988
6 changed files with 203 additions and 0 deletions

View File

@@ -82,6 +82,18 @@ mkosi \
--output-directory=. \
"$@"
# Adjust mtime to reduce unnecessary churn between images caused by us rebuilding repos that have possible not changed in source or binary interfaces.
if [ -f "$PWD/.secure_files/ssh.key" ]; then
# You can use `ssh-keyscan origin.files.kde.org` to get the host key
echo "origin.files.kde.org ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAILUjdH4S7otYIdLUkOZK+owIiByjNQPzGi7GQ5HOWjO6" >> ~/.ssh/known_hosts
chmod 600 "$PWD/.secure_files/ssh.key"
scp -i "$PWD/.secure_files/ssh.key" kdeos@origin.files.kde.org:/home/kdeos/mtimer.json mtimer.json
go -C ./mtimer/ run . -root "$OUTPUT" -json "mtimer.json"
scp -i "$PWD/.secure_files/ssh.key" mtimer.json kdeos@origin.files.kde.org:/home/kdeos/mtimer.json
fi
# NOTE: /efi must be empty so auto mounting can happen. As such we put our templates in a different directory
rm -rfv "${OUTPUT}/efi"
[ -d "${OUTPUT}/efi" ] || mkdir --mode 0700 "${OUTPUT}/efi"

5
mtimer/go.mod Normal file
View File

@@ -0,0 +1,5 @@
module invent.kde.org/kde-linux/mtimer.git
go 1.24.4
require golang.org/x/sync v0.16.0 // indirect

2
mtimer/go.mod.license Normal file
View File

@@ -0,0 +1,2 @@
SPDX-License-Identifier: CC0-1.0
SPDX-FileCopyrightText: none

2
mtimer/go.sum Normal file
View File

@@ -0,0 +1,2 @@
golang.org/x/sync v0.16.0 h1:ycBJEhp9p4vXvUZNszeOq0kGTPghopOL8q0fq3vstxw=
golang.org/x/sync v0.16.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=

2
mtimer/go.sum.license Normal file
View File

@@ -0,0 +1,2 @@
SPDX-License-Identifier: CC0-1.0
SPDX-FileCopyrightText: none

180
mtimer/main.go Normal file
View File

@@ -0,0 +1,180 @@
// SPDX-License-Identifier: GPL-2.0-only OR GPL-3.0-only OR LicenseRef-KDE-Accepted-GPL
// SPDX-FileCopyrightText: 2025 Harald Sitter <sitter@kde.org>
package main
import (
"context"
"crypto/sha256"
"encoding/hex"
"encoding/json"
"flag"
"io"
"log"
"os"
"path/filepath"
"runtime"
"time"
"golang.org/x/sync/errgroup"
)
type FileInfo struct {
SHA256 string `json:"sha256"`
MTime int64 `json:"mtime"`
}
type SHA256Sums struct {
Files map[string]FileInfo `json:"files"`
}
type Analysis struct {
info FileInfo
absPath string
relPath string
}
func loadBlob(jsonPath string) (*SHA256Sums, error) {
data, err := os.ReadFile(jsonPath)
if err != nil {
return nil, err
}
var sums SHA256Sums
err = json.Unmarshal(data, &sums)
if err != nil {
return nil, err
}
return &sums, nil
}
func sha256SumFile(path string) string {
f, err := os.Open(path)
if err != nil {
log.Fatal(err)
}
defer f.Close()
h := sha256.New()
if _, err := io.Copy(h, f); err != nil {
log.Fatal(err)
}
return hex.EncodeToString(h.Sum(nil))
}
func recordNewFile(blob *SHA256Sums, absPath string, relPath string, info os.FileInfo) {
blob.Files[relPath] = FileInfo{
SHA256: sha256SumFile(absPath),
MTime: info.ModTime().Unix(),
}
}
func analyze(input Analysis) Analysis {
newSum := sha256SumFile(input.absPath)
if input.info.SHA256 != newSum {
// File is really different
return Analysis{
info: FileInfo{
SHA256: newSum,
MTime: input.info.MTime,
},
absPath: input.absPath,
relPath: input.relPath,
}
}
// The file has not actually changed. Apply the original mtime.
log.Println("Restoring mtime for", input.relPath)
os.Chtimes(input.absPath, time.Unix(input.info.MTime, 0), time.Unix(input.info.MTime, 0))
return input
}
func main() {
root := flag.String("root", "", "rootfs to operate on")
jsonPath := flag.String("json", "", "json file to read and write")
flag.Parse()
if *root == "" {
log.Fatal("no root path specified")
}
if *jsonPath == "" {
log.Fatal("no json path specified")
}
blob, err := loadBlob(*jsonPath)
if err != nil {
log.Fatal(err)
}
newBlob := &SHA256Sums{
Files: map[string]FileInfo{},
}
toAnalyze := []Analysis{}
err = filepath.Walk(*root, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if info.IsDir() || info.Mode()&os.ModeSymlink != 0 {
return nil
}
relPath, err := filepath.Rel(*root, path)
if err != nil {
return err
}
blobInfo, ok := blob.Files[relPath]
if !ok {
// New file, create new entry
recordNewFile(newBlob, path, relPath, info)
return nil
}
if blobInfo.MTime != info.ModTime().Unix() { // We only care about seconds precision, there is more than that between two builds anyway
// Changed file, queue for analysis
toAnalyze = append(toAnalyze, Analysis{
info: blobInfo,
absPath: path,
relPath: relPath,
})
return nil
}
// Unchanged, carry over old entry
newBlob.Files[relPath] = blobInfo
return nil
})
if err != nil {
log.Fatal(err)
}
ctx := context.Background()
g, ctx := errgroup.WithContext(ctx)
g.SetLimit(runtime.NumCPU())
results := make([]Analysis, len(toAnalyze))
for i, input := range toAnalyze {
i, input := i, input // https://golang.org/doc/faq#closures_and_goroutines
g.Go(func() error {
info := analyze(input)
results[i] = info
return nil
})
}
if err := g.Wait(); err != nil {
log.Fatal(err)
}
for _, result := range results {
newBlob.Files[result.relPath] = result.info
}
data, err := json.MarshalIndent(newBlob, "", " ")
if err != nil {
log.Fatal(err)
}
if err := os.WriteFile(*jsonPath, data, 0644); err != nil {
log.Fatal(err)
}
}