From e63a0e8fd774fcb2138ea4cc0ca697f6f919ce7f Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 20 May 2026 22:16:54 +0000 Subject: [PATCH] feat(usage): backfill Source on pre-feature usage rows InitDB now classifies any pre-existing usage_record with an empty source: 'legacy-api-key' user -> legacy, everything else -> web. The backfill is idempotent (only touches NULL/empty rows). Refs: #9862 Signed-off-by: Ettore Di Giacinto --- core/http/auth/db.go | 8 ++++++- core/http/auth/usage.go | 22 +++++++++++++++++++ core/http/auth/usage_test.go | 42 ++++++++++++++++++++++++++++++++++++ 3 files changed, 71 insertions(+), 1 deletion(-) diff --git a/core/http/auth/db.go b/core/http/auth/db.go index d860e5068..5d94557ea 100644 --- a/core/http/auth/db.go +++ b/core/http/auth/db.go @@ -38,9 +38,15 @@ func InitDB(databaseURL string) (*gorm.DB, error) { } // Backfill: users created before the provider column existed have an empty - // provider — treat them as local accounts so the UI can identify them. + // provider - treat them as local accounts so the UI can identify them. db.Exec("UPDATE users SET provider = ? WHERE provider = '' OR provider IS NULL", ProviderLocal) + // Backfill: pre-feature usage_records have no source column. Classify them so the + // new per-source aggregators include them. + if err := BackfillUsageSource(db); err != nil { + return nil, fmt.Errorf("failed to backfill usage source: %w", err) + } + // Create composite index on users(provider, subject) for fast OAuth lookups if err := db.Exec("CREATE INDEX IF NOT EXISTS idx_users_provider_subject ON users(provider, subject)").Error; err != nil { // Ignore error on postgres if index already exists diff --git a/core/http/auth/usage.go b/core/http/auth/usage.go index ad56ee309..99cecef77 100644 --- a/core/http/auth/usage.go +++ b/core/http/auth/usage.go @@ -138,6 +138,28 @@ func GetUserUsage(db *gorm.DB, userID, period string) ([]UsageBucket, error) { return buckets, nil } +// BackfillUsageSource sets the Source column on pre-feature usage rows. +// Idempotent: only touches rows where source is NULL or empty. +// - rows whose user_id == "legacy-api-key" -> UsageSourceLegacy +// - everything else -> UsageSourceWeb +func BackfillUsageSource(db *gorm.DB) error { + // Legacy first (more specific predicate) + if err := db.Exec( + `UPDATE usage_records SET source = ? WHERE (source IS NULL OR source = '') AND user_id = ?`, + UsageSourceLegacy, "legacy-api-key", + ).Error; err != nil { + return fmt.Errorf("backfill legacy usage source: %w", err) + } + // Everything else -> web + if err := db.Exec( + `UPDATE usage_records SET source = ? WHERE (source IS NULL OR source = '')`, + UsageSourceWeb, + ).Error; err != nil { + return fmt.Errorf("backfill web usage source: %w", err) + } + return nil +} + // GetAllUsage returns aggregated usage for all users (admin). Optional userID filter. func GetAllUsage(db *gorm.DB, period, userID string) ([]UsageBucket, error) { sqlite := isSQLiteDB(db) diff --git a/core/http/auth/usage_test.go b/core/http/auth/usage_test.go index c3ad1e43f..41ff1bd65 100644 --- a/core/http/auth/usage_test.go +++ b/core/http/auth/usage_test.go @@ -159,6 +159,48 @@ var _ = Describe("Usage", func() { }) }) + Describe("Usage source backfill", func() { + It("backfills 'web' for pre-feature rows", func() { + db := testDB() + + rawDB, err := db.DB() + Expect(err).ToNot(HaveOccurred()) + _, err = rawDB.Exec( + `INSERT INTO usage_records (user_id, source, model, created_at, total_tokens, prompt_tokens, completion_tokens, duration) VALUES (?, '', ?, ?, 0, 0, 0, 0)`, + "user-x", "gpt-4", time.Now()) + Expect(err).ToNot(HaveOccurred()) + + Expect(auth.BackfillUsageSource(db)).To(Succeed()) + + var loaded auth.UsageRecord + Expect(db.Where("user_id = ?", "user-x").First(&loaded).Error).To(Succeed()) + Expect(loaded.Source).To(Equal(auth.UsageSourceWeb)) + }) + + It("backfills 'legacy' for pre-feature rows with legacy-api-key user_id", func() { + db := testDB() + + rawDB, err := db.DB() + Expect(err).ToNot(HaveOccurred()) + _, err = rawDB.Exec( + `INSERT INTO usage_records (user_id, source, model, created_at, total_tokens, prompt_tokens, completion_tokens, duration) VALUES (?, '', ?, ?, 0, 0, 0, 0)`, + "legacy-api-key", "gpt-4", time.Now()) + Expect(err).ToNot(HaveOccurred()) + + Expect(auth.BackfillUsageSource(db)).To(Succeed()) + + var loaded auth.UsageRecord + Expect(db.Where("user_id = ?", "legacy-api-key").First(&loaded).Error).To(Succeed()) + Expect(loaded.Source).To(Equal(auth.UsageSourceLegacy)) + }) + + It("is idempotent on re-run", func() { + db := testDB() + Expect(auth.BackfillUsageSource(db)).To(Succeed()) + Expect(auth.BackfillUsageSource(db)).To(Succeed()) + }) + }) + Describe("UsageRecord with source fields", func() { It("persists Source, APIKeyID, APIKeyName", func() { db := testDB()