From ea5585a8efe94f0b1b6a81da5a110f95e640cc94 Mon Sep 17 00:00:00 2001 From: "Jokob @NetAlertX" <96159884+jokob-sk@users.noreply.github.com> Date: Sun, 1 Mar 2026 06:07:57 +0000 Subject: [PATCH] Add database cleanup for Sessions and optimize queries - Implemented deletion of Sessions older than DAYS_TO_KEEP_EVENTS. - Added index for Plugins_History to improve query performance. - Introduced unit tests for Sessions trimming and database analysis. --- front/plugins/db_cleanup/script.py | 19 +++ server/db/db_upgrade.py | 8 + test/db/test_db_cleanup.py | 257 +++++++++++++++++++++++++++++ 3 files changed, 284 insertions(+) create mode 100644 test/db/test_db_cleanup.py diff --git a/front/plugins/db_cleanup/script.py b/front/plugins/db_cleanup/script.py index fcfaf4bc..4d1c74e2 100755 --- a/front/plugins/db_cleanup/script.py +++ b/front/plugins/db_cleanup/script.py @@ -96,6 +96,15 @@ def cleanup_database( cursor.execute(sql) mylog("verbose", [f"[{pluginName}] Events deleted rows: {cursor.rowcount}"]) + # ----------------------------------------------------- + # Sessions (derived snapshot — trimmed to the same window as Events so the + # two tables stay in sync without introducing a separate setting) + mylog("verbose", f"[{pluginName}] Sessions: Delete all older than {str(DAYS_TO_KEEP_EVENTS)} days (reuses DAYS_TO_KEEP_EVENTS)") + sql = f"""DELETE FROM Sessions WHERE ses_DateTimeConnection <= date('now', '-{str(DAYS_TO_KEEP_EVENTS)} day')""" + mylog("verbose", [f"[{pluginName}] SQL : {sql}"]) + cursor.execute(sql) + mylog("verbose", [f"[{pluginName}] Sessions deleted rows: {cursor.rowcount}"]) + # ----------------------------------------------------- # Plugins_History mylog("verbose", f"[{pluginName}] Plugins_History: Trim to {str(PLUGINS_KEEP_HIST)} per Plugin") @@ -199,9 +208,19 @@ def cleanup_database( cursor.execute("PRAGMA wal_checkpoint(FULL);") mylog("verbose", [f"[{pluginName}] WAL checkpoint executed to truncate file."]) + # Refresh query-planner statistics after bulk deletes so SQLite chooses + # the right indexes on the next scan cycle (fixes CPU scaling with DB size) + cursor.execute("ANALYZE;") + mylog("verbose", [f"[{pluginName}] ANALYZE completed"]) + mylog("verbose", [f"[{pluginName}] Shrink Database"]) cursor.execute("VACUUM;") + # Lightweight incremental ANALYZE at connection close — near-zero cost, + # only re-analyzes tables whose statistics have drifted >25% + cursor.execute("PRAGMA optimize;") + mylog("verbose", [f"[{pluginName}] PRAGMA optimize completed"]) + conn.close() diff --git a/server/db/db_upgrade.py b/server/db/db_upgrade.py index d3f18de2..8886b959 100755 --- a/server/db/db_upgrade.py +++ b/server/db/db_upgrade.py @@ -405,6 +405,14 @@ def ensure_Indexes(sql) -> bool: "idx_plugins_plugin_mac_ip", "CREATE INDEX idx_plugins_plugin_mac_ip ON Plugins_Objects(Plugin, Object_PrimaryID, Object_SecondaryID)", ), # Issue #1251: Optimize name resolution lookup + # Plugins_History: covers both the db_cleanup window function + # (PARTITION BY Plugin ORDER BY DateTimeChanged DESC) and the + # API query (SELECT * … ORDER BY DateTimeChanged DESC). + # Without this, both ops do a full 48k-row table sort on every cycle. + ( + "idx_plugins_history_plugin_dt", + "CREATE INDEX idx_plugins_history_plugin_dt ON Plugins_History(Plugin, DateTimeChanged DESC)", + ), ] for name, create_sql in indexes: diff --git a/test/db/test_db_cleanup.py b/test/db/test_db_cleanup.py new file mode 100644 index 00000000..941161b3 --- /dev/null +++ b/test/db/test_db_cleanup.py @@ -0,0 +1,257 @@ +""" +Unit tests for db_cleanup plugin SQL logic. + +Covers: +- Sessions trim (reuses DAYS_TO_KEEP_EVENTS window) +- ANALYZE refreshes sqlite_stat1 after bulk deletes +- PRAGMA optimize runs without error + +Each test creates an isolated in-memory SQLite database so there is no +dependency on the running application or its config. +""" + +import sqlite3 +import os + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _make_db(): + """Return an in-memory connection seeded with the tables used by db_cleanup.""" + conn = sqlite3.connect(":memory:") + cur = conn.cursor() + + cur.execute(""" + CREATE TABLE Events ( + eve_MAC TEXT NOT NULL, + eve_IP TEXT NOT NULL, + eve_DateTime DATETIME NOT NULL, + eve_EventType TEXT NOT NULL, + eve_AdditionalInfo TEXT DEFAULT '', + eve_PendingAlertEmail INTEGER NOT NULL DEFAULT 1, + eve_PairEventRowid INTEGER + ) + """) + + cur.execute(""" + CREATE TABLE Sessions ( + ses_MAC TEXT, + ses_IP TEXT, + ses_EventTypeConnection TEXT, + ses_DateTimeConnection DATETIME, + ses_EventTypeDisconnection TEXT, + ses_DateTimeDisconnection DATETIME, + ses_StillConnected INTEGER, + ses_AdditionalInfo TEXT + ) + """) + + conn.commit() + return conn + + +def _seed_sessions(cur, old_count: int, recent_count: int, days: int): + """ + Insert `old_count` rows with connection date older than `days` days and + `recent_count` rows with connection date today. + """ + for i in range(old_count): + cur.execute( + "INSERT INTO Sessions (ses_MAC, ses_DateTimeConnection) " + "VALUES (?, date('now', ?))", + (f"AA:BB:CC:DD:EE:{i:02X}", f"-{days + 1} day"), + ) + for i in range(recent_count): + cur.execute( + "INSERT INTO Sessions (ses_MAC, ses_DateTimeConnection) " + "VALUES (?, date('now'))", + (f"11:22:33:44:55:{i:02X}",), + ) + + +def _run_sessions_trim(cur, days: int) -> int: + """Execute the exact DELETE used by db_cleanup and return rowcount.""" + cur.execute( + f"DELETE FROM Sessions " + f"WHERE ses_DateTimeConnection <= date('now', '-{days} day')" + ) + return cur.rowcount + + +# --------------------------------------------------------------------------- +# Sessions trim tests +# --------------------------------------------------------------------------- + +class TestSessionsTrim: + + def test_old_rows_are_deleted(self): + """Rows older than DAYS_TO_KEEP_EVENTS window must be removed.""" + conn = _make_db() + cur = conn.cursor() + _seed_sessions(cur, old_count=10, recent_count=5, days=30) + + deleted = _run_sessions_trim(cur, days=30) + + assert deleted == 10, f"Expected 10 old rows deleted, got {deleted}" + cur.execute("SELECT COUNT(*) FROM Sessions") + remaining = cur.fetchone()[0] + assert remaining == 5, f"Expected 5 recent rows to survive, got {remaining}" + + def test_recent_rows_are_preserved(self): + """Rows within the retention window must not be touched.""" + conn = _make_db() + cur = conn.cursor() + _seed_sessions(cur, old_count=0, recent_count=20, days=30) + + deleted = _run_sessions_trim(cur, days=30) + + assert deleted == 0, f"Expected 0 deletions, got {deleted}" + cur.execute("SELECT COUNT(*) FROM Sessions") + assert cur.fetchone()[0] == 20 + + def test_empty_table_is_a_no_op(self): + """Trim against an empty Sessions table must not raise.""" + conn = _make_db() + cur = conn.cursor() + + deleted = _run_sessions_trim(cur, days=30) + + assert deleted == 0 + + def test_trim_is_bounded_by_days_parameter(self): + """Only rows strictly outside the window are removed; boundary row survives.""" + conn = _make_db() + cur = conn.cursor() + # Row exactly AT the boundary (date = 'now' - days exactly) + cur.execute( + "INSERT INTO Sessions (ses_MAC, ses_DateTimeConnection) " + "VALUES (?, date('now', ?))", + ("AA:BB:CC:00:00:01", "-30 day"), + ) + # Row just inside the window + cur.execute( + "INSERT INTO Sessions (ses_MAC, ses_DateTimeConnection) " + "VALUES (?, date('now', '-29 day'))", + ("AA:BB:CC:00:00:02",), + ) + + _run_sessions_trim(cur, days=30) + + cur.execute("SELECT ses_MAC FROM Sessions") + remaining_macs = {row[0] for row in cur.fetchall()} + # Boundary row (== threshold) is deleted; inside row survives + assert "AA:BB:CC:00:00:02" in remaining_macs, "Row inside window was wrongly deleted" + + def test_sessions_trim_uses_same_value_as_events(self): + """ + Regression: verify that the Sessions DELETE uses an identical day-offset + expression to the Events DELETE so the two tables stay aligned. + """ + + INSTALL_PATH = os.getenv("NETALERTX_APP", "/app") + script_path = os.path.join( + INSTALL_PATH, "front", "plugins", "db_cleanup", "script.py" + ) + with open(script_path) as fh: + source = fh.read() + + events_expr = "DELETE FROM Events WHERE eve_DateTime <= date('now', '-{str(DAYS_TO_KEEP_EVENTS)} day')" + sessions_expr = "DELETE FROM Sessions WHERE ses_DateTimeConnection <= date('now', '-{str(DAYS_TO_KEEP_EVENTS)} day')" + + assert events_expr in source, "Events DELETE expression changed unexpectedly" + assert sessions_expr in source, "Sessions DELETE is not aligned with Events DELETE" + + +# --------------------------------------------------------------------------- +# ANALYZE tests +# --------------------------------------------------------------------------- + +class TestAnalyze: + + def test_analyze_populates_sqlite_stat1(self): + """ + After ANALYZE, sqlite_stat1 must exist and have at least one row + for the Events table (which has an implicit rowid index). + """ + conn = _make_db() + cur = conn.cursor() + + # Seed some rows so ANALYZE has something to measure + for i in range(20): + cur.execute( + "INSERT INTO Events (eve_MAC, eve_IP, eve_DateTime, eve_EventType) " + "VALUES (?, '1.2.3.4', date('now'), 'Connected')", + (f"AA:BB:CC:DD:EE:{i:02X}",), + ) + conn.commit() + + cur.execute("ANALYZE;") + conn.commit() + + cur.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='sqlite_stat1'") + assert cur.fetchone() is not None, "sqlite_stat1 table not created by ANALYZE" + + def test_analyze_does_not_raise_on_empty_tables(self): + """ANALYZE against empty tables must complete without exceptions.""" + conn = _make_db() + cur = conn.cursor() + + # Should not raise + cur.execute("ANALYZE;") + conn.commit() + + def test_analyze_is_idempotent(self): + """Running ANALYZE twice must not raise or corrupt state.""" + conn = _make_db() + cur = conn.cursor() + + cur.execute("ANALYZE;") + cur.execute("ANALYZE;") + conn.commit() + + +# --------------------------------------------------------------------------- +# PRAGMA optimize tests +# --------------------------------------------------------------------------- + +class TestPragmaOptimize: + + def test_pragma_optimize_does_not_raise(self): + """PRAGMA optimize must complete without exceptions.""" + conn = _make_db() + cur = conn.cursor() + + # Run ANALYZE first (as db_cleanup does) then optimize + cur.execute("ANALYZE;") + cur.execute("PRAGMA optimize;") + conn.commit() + + def test_pragma_optimize_after_bulk_delete(self): + """ + PRAGMA optimize after a bulk DELETE (simulating db_cleanup) must + complete without error, validating the full tail sequence. + """ + conn = _make_db() + cur = conn.cursor() + + for i in range(50): + cur.execute( + "INSERT INTO Sessions (ses_MAC, ses_DateTimeConnection) " + "VALUES (?, date('now', '-60 day'))", + (f"AA:BB:CC:DD:EE:{i:02X}",), + ) + conn.commit() + + # Mirror the tail sequence from cleanup_database. + # WAL checkpoints are omitted: they require no open transaction and are + # not supported on :memory: databases (SQLite raises OperationalError). + cur.execute("DELETE FROM Sessions WHERE ses_DateTimeConnection <= date('now', '-30 day')") + conn.commit() + cur.execute("ANALYZE;") + conn.execute("VACUUM;") + cur.execute("PRAGMA optimize;") + + cur.execute("SELECT COUNT(*) FROM Sessions") + assert cur.fetchone()[0] == 0