feat(usage): backfill Source on pre-feature usage rows

InitDB now classifies any pre-existing usage_record with an empty
source: 'legacy-api-key' user -> legacy, everything else -> web.
The backfill is idempotent (only touches NULL/empty rows).

Refs: #9862
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto
2026-05-20 22:16:54 +00:00
parent 06db295d9e
commit e63a0e8fd7
3 changed files with 71 additions and 1 deletions

View File

@@ -38,9 +38,15 @@ func InitDB(databaseURL string) (*gorm.DB, error) {
}
// Backfill: users created before the provider column existed have an empty
// provider treat them as local accounts so the UI can identify them.
// provider - treat them as local accounts so the UI can identify them.
db.Exec("UPDATE users SET provider = ? WHERE provider = '' OR provider IS NULL", ProviderLocal)
// Backfill: pre-feature usage_records have no source column. Classify them so the
// new per-source aggregators include them.
if err := BackfillUsageSource(db); err != nil {
return nil, fmt.Errorf("failed to backfill usage source: %w", err)
}
// Create composite index on users(provider, subject) for fast OAuth lookups
if err := db.Exec("CREATE INDEX IF NOT EXISTS idx_users_provider_subject ON users(provider, subject)").Error; err != nil {
// Ignore error on postgres if index already exists

View File

@@ -138,6 +138,28 @@ func GetUserUsage(db *gorm.DB, userID, period string) ([]UsageBucket, error) {
return buckets, nil
}
// BackfillUsageSource sets the Source column on pre-feature usage rows.
// Idempotent: only touches rows where source is NULL or empty.
// - rows whose user_id == "legacy-api-key" -> UsageSourceLegacy
// - everything else -> UsageSourceWeb
func BackfillUsageSource(db *gorm.DB) error {
// Legacy first (more specific predicate)
if err := db.Exec(
`UPDATE usage_records SET source = ? WHERE (source IS NULL OR source = '') AND user_id = ?`,
UsageSourceLegacy, "legacy-api-key",
).Error; err != nil {
return fmt.Errorf("backfill legacy usage source: %w", err)
}
// Everything else -> web
if err := db.Exec(
`UPDATE usage_records SET source = ? WHERE (source IS NULL OR source = '')`,
UsageSourceWeb,
).Error; err != nil {
return fmt.Errorf("backfill web usage source: %w", err)
}
return nil
}
// GetAllUsage returns aggregated usage for all users (admin). Optional userID filter.
func GetAllUsage(db *gorm.DB, period, userID string) ([]UsageBucket, error) {
sqlite := isSQLiteDB(db)

View File

@@ -159,6 +159,48 @@ var _ = Describe("Usage", func() {
})
})
Describe("Usage source backfill", func() {
It("backfills 'web' for pre-feature rows", func() {
db := testDB()
rawDB, err := db.DB()
Expect(err).ToNot(HaveOccurred())
_, err = rawDB.Exec(
`INSERT INTO usage_records (user_id, source, model, created_at, total_tokens, prompt_tokens, completion_tokens, duration) VALUES (?, '', ?, ?, 0, 0, 0, 0)`,
"user-x", "gpt-4", time.Now())
Expect(err).ToNot(HaveOccurred())
Expect(auth.BackfillUsageSource(db)).To(Succeed())
var loaded auth.UsageRecord
Expect(db.Where("user_id = ?", "user-x").First(&loaded).Error).To(Succeed())
Expect(loaded.Source).To(Equal(auth.UsageSourceWeb))
})
It("backfills 'legacy' for pre-feature rows with legacy-api-key user_id", func() {
db := testDB()
rawDB, err := db.DB()
Expect(err).ToNot(HaveOccurred())
_, err = rawDB.Exec(
`INSERT INTO usage_records (user_id, source, model, created_at, total_tokens, prompt_tokens, completion_tokens, duration) VALUES (?, '', ?, ?, 0, 0, 0, 0)`,
"legacy-api-key", "gpt-4", time.Now())
Expect(err).ToNot(HaveOccurred())
Expect(auth.BackfillUsageSource(db)).To(Succeed())
var loaded auth.UsageRecord
Expect(db.Where("user_id = ?", "legacy-api-key").First(&loaded).Error).To(Succeed())
Expect(loaded.Source).To(Equal(auth.UsageSourceLegacy))
})
It("is idempotent on re-run", func() {
db := testDB()
Expect(auth.BackfillUsageSource(db)).To(Succeed())
Expect(auth.BackfillUsageSource(db)).To(Succeed())
})
})
Describe("UsageRecord with source fields", func() {
It("persists Source, APIKeyID, APIKeyName", func() {
db := testDB()